In [1]:
import os
import numpy as np
from sklearn import svm
from sklearn.metrics import classification_report
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input
from joblib import Parallel, delayed
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [2]:
train_dataset_path = "../dataset/train/"
test_dataset_path = "../dataset/test/"

In [3]:
train_image_paths = []
train_labels = []

for class_name in os.listdir(train_dataset_path):
    class_path = os.path.join(train_dataset_path, class_name)
    if os.path.isdir(class_path):
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            train_image_paths.append(image_path)
            train_labels.append(class_name)

In [4]:
test_image_paths = []
test_labels = []

for class_name in os.listdir(test_dataset_path):
    class_path = os.path.join(test_dataset_path, class_name)
    if os.path.isdir(class_path):
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            test_image_paths.append(image_path)
            test_labels.append(class_name)

In [5]:
image_size = (32, 32)
n_jobs = 4

In [6]:
def preprocess_images(image_path):
    img = load_img(image_path, target_size=image_size)
    img_array = img_to_array(img)
    img_array = preprocess_input(img_array)
    return img_array

In [7]:
with tqdm(
    total=len(train_image_paths), desc="Preprocessing Training Images"
) as pbar_train:
    with Parallel(n_jobs=n_jobs) as parallel:
        train_images = parallel(
            delayed(preprocess_images)(image_path) for image_path in train_image_paths
        )
        pbar_train.update(len(train_images))

Preprocessing Training Images: 100%|██████████| 12000/12000 [00:11<00:00, 1016.08it/s]


In [8]:
with tqdm(total=len(test_image_paths), desc="Preprocessing Test Images") as pbar_test:
    with Parallel(n_jobs=n_jobs) as parallel:
        test_images = parallel(
            delayed(preprocess_images)(image_path) for image_path in test_image_paths
        )
        pbar_test.update(len(test_images))

Preprocessing Test Images: 100%|██████████| 1805/1805 [00:00<00:00, 3279.00it/s]


In [9]:
X_train = np.array(train_images)
y_train = np.array(train_labels)

X_test = np.array(test_images)
y_test = np.array(test_labels)

In [10]:
X_train_flatten = X_train.reshape(X_train.shape[0], -1)
X_test_flatten = X_test.reshape(X_test.shape[0], -1)

In [11]:
label_encoder = LabelEncoder()  # Initialize the label encoder
y_train_encoded = label_encoder.fit_transform(y_train)

In [12]:
svm_model = svm.SVC(kernel="linear")

with tqdm(total=len(X_train_flatten), desc="Training SVM") as pbar:
    svm_model.fit(X_train_flatten, y_train)
    pbar.update(X_train_flatten.shape[0])

Training SVM: 100%|██████████| 12000/12000 [02:28<00:00, 80.72it/s]


In [13]:
y_pred = svm_model.predict(X_test_flatten)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

   Black Rot       0.67      0.76      0.71       472
        ESCA       0.82      0.75      0.78       480
     Healthy       0.95      0.89      0.92       423
 Leaf Blight       0.90      0.90      0.90       430

    accuracy                           0.82      1805
   macro avg       0.83      0.83      0.83      1805
weighted avg       0.83      0.82      0.82      1805



In [14]:
accuracy = accuracy_score(y_pred, y_test)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.82


In [15]:
import joblib

model_filename = "svm_model.joblib"
with open(model_filename, "wb") as model_file:
    joblib.dump(svm_model, model_file)