# Import library

In [36]:

import os, cv2
import numpy as np
import pandas as pd
import mahotas
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load CSV file

In [37]:
csv_path = "train.csv"
df = pd.read_csv(csv_path)


# Add image paths

In [38]:
data_dir = "images"
df['image_path'] = df['image_id'].apply(lambda x: os.path.join(data_dir, x + ".jpg"))



# create Function to feature Extraction

In [39]:
def extract_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return None
    image = cv2.resize(image, (128, 128))
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    hist = cv2.calcHist([image], [0,1,2], None, [8,8,8], [0,256]*3).flatten()
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0,10), range=(0,9))
    lbp_hist = lbp_hist.astype("float")
    lbp_hist /= (lbp_hist.sum() + 1e-6)
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return np.hstack([hist, lbp_hist, haralick])


#  EXTRACT FEATURES

In [40]:
features = []
labels = []

for _, row in df.iterrows():
    f = extract_features(row['image_path'])
    if f is not None:
        features.append(f)
        labels.append(row[['healthy', 'multiple_diseases', 'rust', 'scab']].idxmax())

X = np.array(features)
y = np.array(labels)

# ENCODE LABELS

In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

#  TRAIN-TEST SPLIT

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# MODEL train

In [41]:
svm = SVC(kernel='rbf', C=10)
svm.fit(X_train, y_train)

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)

gb = GradientBoostingClassifier(n_estimators=100)
gb.fit(X_train, y_train)


In [42]:
target_names = le.classes_.tolist()

for model, name in zip([svm, rf, gb], ['SVM', 'Random Forest', 'Gradient Boosting']):
    y_pred = model.predict(X_test)
    print(f"\n{name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, target_names=target_names))


SVM Accuracy: 0.6137
                   precision    recall  f1-score   support

          healthy       0.69      0.66      0.68       103
multiple_diseases       0.00      0.00      0.00        18
             rust       0.61      0.62      0.61       125
             scab       0.56      0.66      0.61       119

         accuracy                           0.61       365
        macro avg       0.47      0.49      0.47       365
     weighted avg       0.59      0.61      0.60       365


Random Forest Accuracy: 0.8110
                   precision    recall  f1-score   support

          healthy       0.78      0.76      0.77       103
multiple_diseases       0.00      0.00      0.00        18
             rust       0.89      0.97      0.93       125
             scab       0.76      0.82      0.79       119

         accuracy                           0.81       365
        macro avg       0.61      0.64      0.62       365
     weighted avg       0.77      0.81      0.79       3

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [44]:
import pickle
with open("svm_model.pkl", "wb") as f:
    pickle.dump(svm, f)


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting numpy
  Using cached numpy-2.2.6-cp312-cp312-win_amd64.whl.metadata (60 kB)
Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl (38.9 MB)
   ---------------------------------------- 0.0/38.9 MB ? eta -:--:--
    --------------------------------------- 0.5/38.9 MB 4.2 MB/s eta 0:00:10
   - -------------------------------------- 1.3/38.9 MB 3.9 MB/s eta 0:00:10
   -- ------------------------------------- 2.4/38.9 MB 4.1 MB/s eta 0:00:09
   --- ------------------------------------ 3.1/38.9 MB 4.1 MB/s eta 0:00:09
   --- ------------------------------------ 3.7/38.9 MB 3.8 MB/s eta 0:00:10
   ---- ----------------------------------- 4.7/38.9 MB 4.0 MB/s eta 0:00:09
   ----- ---------------------------------- 5.0/38.9 MB 4.0 MB/s eta 0:00:09
   ----- ---------------------------------- 5.0/38.9 MB 4.0 MB/s eta 0:00:09
   ----- -----------------

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\dell\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.

