#Imports

In [None]:
import cv2
import joblib
import numpy as np
import os
import pandas as pd
import random
import scipy.ndimage
from collections import Counter
from matplotlib import pyplot as plt
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import adjusted_rand_score
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences


#SIFT + GMM

In [None]:

zip_path = "/content/archive (4).zip"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("/content/")

In [None]:

labels_list = []
valid_images_to_continue_with = []
invalid_images = 0
categories_name = {}

def check_valid_images_all_folders(directory):
    global invalid_images
    for i, folder in enumerate(os.listdir(directory)):
        folder_path = os.path.join(directory, folder)

        if os.path.isdir(folder_path):
            categories_name[folder] = 0
            for filename in os.listdir(folder_path):
                file_path = os.path.join(folder_path, filename)

                try:
                    img = cv2.imread(file_path)
                    if img is None:
                        invalid_images += 1
                    else:
                        valid_images_to_continue_with.append(img)
                        labels_list.append(folder)
                        categories_name[folder] += 1
                except Exception as e:
                    invalid_images += 1

check_valid_images_all_folders('/content/PlantVillage')

print('The number of valid images to use:', len(valid_images_to_continue_with))
print('The number of invalid images:', invalid_images)
print('Number of images in each category:')
for category, count in categories_name.items():
    print(f'{category}: {count}')


The number of valid images to use: 20638
The number of invalid images: 1
Number of images in each category:
Tomato_Septoria_leaf_spot: 1771
Tomato__Tomato_YellowLeaf__Curl_Virus: 3208
Pepper__bell___healthy: 1478
Potato___Early_blight: 1000
Tomato__Target_Spot: 1404
Potato___healthy: 152
Tomato_Early_blight: 1000
Pepper__bell___Bacterial_spot: 997
Tomato_healthy: 1591
Potato___Late_blight: 1000
Tomato_Bacterial_spot: 2127
Tomato__Tomato_mosaic_virus: 373
Tomato_Spider_mites_Two_spotted_spider_mite: 1676
Tomato_Late_blight: 1909
Tomato_Leaf_Mold: 952


In [None]:

keypoints_list=[]
descriptors_list=[]
sift = cv2.SIFT_create(nfeatures=100)


for i in range(len(valid_images_to_continue_with)):
    image = valid_images_to_continue_with[i]
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)


    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    keypoints_list.append(keypoints)
    descriptors_list.append(descriptors)

In [None]:


filtered_descriptors = [desc for desc in descriptors_list if desc is not None]

max_length = 100
padded_descriptors = pad_sequences(filtered_descriptors, maxlen=max_length, padding='post', truncating='post', dtype='float32')

flattened_descriptors = padded_descriptors.reshape((len(padded_descriptors), -1))

X = np.array(flattened_descriptors)

In [None]:

label_mapping = {label: i for i, label in enumerate(set(labels_list))}
labels_list_int = [label_mapping[label] for label in labels_list]

print(label_mapping)

{'Pepper__bell___healthy': 0, 'Potato___healthy': 1, 'Tomato__Target_Spot': 2, 'Tomato_Spider_mites_Two_spotted_spider_mite': 3, 'Tomato__Tomato_YellowLeaf__Curl_Virus': 4, 'Tomato_healthy': 5, 'Potato___Late_blight': 6, 'Pepper__bell___Bacterial_spot': 7, 'Tomato__Tomato_mosaic_virus': 8, 'Tomato_Bacterial_spot': 9, 'Tomato_Late_blight': 10, 'Potato___Early_blight': 11, 'Tomato_Septoria_leaf_spot': 12, 'Tomato_Early_blight': 13, 'Tomato_Leaf_Mold': 14}


In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, labels_list_int, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=128)
X_train_reduced = pca.fit_transform(X_train_scaled)
X_test_reduced = pca.transform(X_test_scaled)

K = 15

gmm_model = GaussianMixture(n_components=K, random_state=42, max_iter=500)

gmm_model.fit(X_train_reduced)

y_pred_train = gmm_model.predict(X_train_reduced)
y_pred_test = gmm_model.predict(X_test_reduced)

In [None]:
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred_test))

Accuracy Score:
0.10271317829457365


In [None]:

cluster_label_mapping = {}
gmm_train_preds = gmm_model.predict(X_train_reduced)
for cluster in range(K):
    indices = [i for i, c in enumerate(gmm_train_preds) if c == cluster]
    if indices:
        majority_label = Counter(np.array(y_train)[indices]).most_common(1)[0][0]
        cluster_label_mapping[cluster] = majority_label

inv_label_mapping = {v: k for k, v in label_mapping.items()}

cluster_class_mapping = {cluster: inv_label_mapping[majority_label]
                         for cluster, majority_label in cluster_label_mapping.items()}

def predict_class(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    keypoints, descriptors = sift.detectAndCompute(gray_image, None)

    if descriptors is None:
        return "No features found in the image."

    padded_descriptor = pad_sequences([descriptors],
                                      maxlen=max_length,
                                      padding='post',
                                      truncating='post',
                                      dtype='float32')
    flattened_descriptor = padded_descriptor.reshape((1, -1))
    scaled = scaler.transform(flattened_descriptor)
    reduced = pca.transform(scaled)

    predicted_cluster = gmm_model.predict(reduced)[0]
    return cluster_class_mapping.get(predicted_cluster, "Unknown")

new_image = cv2.imread('/content/tomato_bacteria_spot.jpg')
predicted_class = predict_class(new_image)
print("Predicted class:", predicted_class)


error: OpenCV(4.11.0) /io/opencv/modules/imgproc/src/color.cpp:199: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [None]:

joblib.dump(gmm_model, 'gmm_model.pkl')
joblib.dump(pca, 'pca_transform.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [None]:

label_mapping = {
    'Tomato_Leaf_Mold': 0, 'Potato___Early_blight': 1, 'Potato___healthy': 2,
    'Tomato_Bacterial_spot': 3, 'Tomato__Target_Spot': 4, 'Tomato_Septoria_leaf_spot': 5,
    'Tomato__Tomato_YellowLeaf__Curl_Virus': 6, 'Pepper__bell___Bacterial_spot': 7,
    'Pepper__bell___healthy': 8, 'Tomato_Late_blight': 9, 'Potato___Late_blight': 10,
    'Tomato_healthy': 11, 'Tomato_Spider_mites_Two_spotted_spider_mite': 12,
    'Tomato__Tomato_mosaic_virus': 13, 'Tomato_Early_blight': 14
}
inv_label_mapping = {v: k for k, v in label_mapping.items()}

gmm_labels = gmm_model.predict(X_train_reduced)

K = gmm_model.n_components
cluster_label_mapping = {}
for cluster in range(K):
    indices = [i for i, c in enumerate(gmm_labels) if c == cluster]
    if indices:
        majority_label = Counter(np.array(y_train)[indices]).most_common(1)[0][0]
        cluster_label_mapping[cluster] = majority_label

cluster_class_mapping = {
    cluster: inv_label_mapping[label] for cluster, label in cluster_label_mapping.items()
}

joblib.dump(cluster_class_mapping, 'cluster_class_mapping.pkl')
print("✅ cluster_class_mapping.pkl saved.")


✅ cluster_class_mapping.pkl saved.


In [None]:

scaler = joblib.load('scaler.pkl')
pca = joblib.load('pca_transform.pkl')
gmm_model = joblib.load('gmm_model.pkl')
cluster_class_mapping = joblib.load('cluster_class_mapping.pkl')

max_length = 100
sift = cv2.SIFT_create(nfeatures=100)

def predict_class_from_image(image_path):
    image = cv2.imread(image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    if descriptors is None:
        return "No features found."

    padded_descriptor = pad_sequences([descriptors], maxlen=max_length, padding='post', truncating='post', dtype='float32')
    flat_descriptor = padded_descriptor.reshape((1, -1))
    scaled = scaler.transform(flat_descriptor)
    reduced = pca.transform(scaled)

    predicted_cluster = gmm_model.predict(reduced)[0]
    return cluster_class_mapping.get(predicted_cluster, "Unknown")

predicted = predict_class_from_image('/content/f91b6466-7c71-44ef-86f4-eaf2cdabf8bf___Crnl_L.Mold 7055.JPG')
print("Predicted class:", predicted)


Predicted class: Potato___healthy
