In [1]:
# Packages that may need to be installed to run
#pip install opencv-python

In [2]:
import numpy as np 
import cv2
import os
import pandas as pd
import glob
import matplotlib.pyplot as plt       
%matplotlib inline

from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from skimage.feature import local_binary_pattern
# from skimage.feature import graycomatrix, graycoprops

In [3]:
# Data Cleaning
# Data source cite: https://github.com/gabrieldgf4/PlantVillage-Dataset
# Repost from the plantvillage.org repository that is no longer available

# Setting filters for the images hsv colors
low_hsv = (0,60,0)
high_hsv = (179,255,255)

#cleaning the images
def cleaning_image(images):
    clean_image= []
    for image in images:
        gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(gray_scale, low_hsv,high_hsv)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones((8,8),dtype=np.uint8))
        clean_image.append(cv2.bitwise_and(image, image,mask=mask))
    return clean_image

In [4]:
def get_images():
    leaf_images = {}
    clean_leaf_images = {}
    count = 1
    PlantDataset = glob.glob(r'LeavesDataset\*')
    
    for plant in PlantDataset:
        print("Getting",plant.split('\\')[1])
        LeavesDataset = glob.glob(plant+'\*')
        
        for leaves in LeavesDataset:
            leaves_name = leaves.split("\\")[2]
            print("\tGetting",leaves_name,":", end="\t")
            
            leaf_images[leaves_name] = [cv2.imread(file) for file in glob.glob(leaves+'\*.JPG')]
            clean_leaf_images[leaves_name] = cleaning_image(leaf_images[leaves_name])
            print("load",len(leaf_images[leaves_name]),"images")
            
    print("Complete")
    return leaf_images, clean_leaf_images


leaf_images,clean_images = get_images()

Getting PotatoDataset
	Getting potato_early_blight :	load 1000 images
	Getting potato_healthy :	load 152 images
	Getting potato_late_blight :	load 1000 images
Getting TomatoDataset
	Getting Tomato_bacterial_spot :	load 1000 images
	Getting Tomato_healthy :	load 1000 images
	Getting Tomato_leaf_mold :	load 952 images
	Getting Tomato_septoria_leaf_spot :	load 1000 images
Complete


In [5]:
# Image Checker
# cv2.imshow("Image", clean_images["Tomato_septoria_leaf_spot" ][9])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [6]:
# Feature gathering just color channels code

def feature_gathering_color_Channels(images, label):
    # Features to collect
    red_mean = []
    blue_mean = []
    green_mean = []
    # Target Variables
    labels = [label]*len(images)
    isDiseased = [label.find('healthy') == -1]*len(images)
    
    for image in images:
        red_mean.append(np.mean(image[:,:,0]))
        blue_mean.append(np.mean(image[:,:,1]))
        green_mean.append(np.mean(image[:,:,2]))
        
    return pd.DataFrame({"label":labels, "isDiseased":isDiseased,
                         "RedMean":red_mean, "BlueMean":blue_mean, "GreenMean": green_mean}) #Feature variables

In [7]:
def compute_lbp_histogram(image, num_points=8, radius=1, method="uniform"):
    if len(image.shape) == 3 and image.shape[2] == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image
    
    # Compute LBP codes
    lbp = local_binary_pattern(gray, num_points, radius, method=method)
    
    # Build the histogram
    n_bins = num_points + 2
    hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
    
    # Normalize
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    
    return hist

In [8]:
def feature_gathering_with_lbp(images, label):
    data = []
    
    for image in images:
        # If image is None, skip or fill with NaNs
        if image is None:
            row = {
                "RedMean": np.nan,
                "GreenMean": np.nan,
                "BlueMean": np.nan
            }
            for i in range(10):
                row[f"LBP_{i}"] = np.nan
            row["label"] = label
            data.append(row)
            continue
        
        # Color means (OpenCV = BGR)
        red_mean   = np.mean(image[:, :, 2])
        green_mean = np.mean(image[:, :, 1])
        blue_mean  = np.mean(image[:, :, 0])
        
        # Compute LBP histogram
        lbp_hist = compute_lbp_histogram(image, num_points=8, radius=1, method="uniform")
        
        # Build row
        row = {
            "RedMean": red_mean,
            "GreenMean": green_mean,
            "BlueMean": blue_mean
        }
        
        # Add LBP histogram bins as separate features
        for i, val in enumerate(lbp_hist):
            row[f"LBP_{i}"] = val
        
        row["label"] = label
        row["isDiseased"] = label.find('healthy') == -1
        
        data.append(row)
    
    return pd.DataFrame(data)

In [9]:
# Note: the skimage import is commented out too 

# def feature_gathering_with_glcm(images, label):
#     # Define GLCM parameters
#     properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
#     angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
#     angle_names = ['0', '45', '90', '135']
    
#     data = []
#     for image in images:
#         if image is None:
#             # If an image failed to load, assign NaN to all features
#             row = {"RedMean": np.nan, "GreenMean": np.nan, "BlueMean": np.nan}
#             for prop in properties:
#                 for ang in angle_names:
#                     row[f"{prop}_{ang}"] = np.nan
#             row["label"] = label
#             data.append(row)
#             continue
        
#         # Extract color features (note: OpenCV uses BGR)
#         red_mean = np.mean(image[:, :, 2])
#         green_mean = np.mean(image[:, :, 1])
#         blue_mean = np.mean(image[:, :, 0])
        
#         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         glcm = graycomatrix(gray, distances=[5], angles=angles, levels=256, symmetric=True, normed=True)
        
#         # Extract texture features from the GLCM for each property and angle
#         glcm_features = {}
#         for prop in properties:
#             vals = graycoprops(glcm, prop).flatten()  
#             for i, ang in enumerate(angle_names):
#                 glcm_features[f"{prop}_{ang}"] = vals[i]
        
#         # Combine features into single row
#         row = {
#             "RedMean": red_mean,
#             "GreenMean": green_mean,
#             "BlueMean": blue_mean
#         }
#         row.update(glcm_features)
#         row["label"] = label
#         row["isDiseased"] = label.find('healthy') == -1
#         data.append(row)
        
#     return pd.DataFrame(data)

In [10]:
# uncleaned images
uncleaned_leaves_df = pd.DataFrame()

for key in leaf_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering_with_lbp(leaf_images[key],key)
    uncleaned_leaves_df = pd.concat([uncleaned_leaves_df, leaf_df], axis = 0)
    
unclean_X = uncleaned_leaves_df.drop(columns= ["label",'isDiseased'], axis = 1)
unclean_y = uncleaned_leaves_df[["label","isDiseased"]]

Collecting features for potato_early_blight
Collecting features for potato_healthy
Collecting features for potato_late_blight
Collecting features for Tomato_bacterial_spot
Collecting features for Tomato_healthy
Collecting features for Tomato_leaf_mold
Collecting features for Tomato_septoria_leaf_spot


In [11]:
uncleaned_leaves_df.head()

Unnamed: 0,RedMean,GreenMean,BlueMean,LBP_0,LBP_1,LBP_2,LBP_3,LBP_4,LBP_5,LBP_6,LBP_7,LBP_8,LBP_9,label,isDiseased
0,119.126999,121.584213,98.035309,0.111954,0.10173,0.059052,0.069702,0.084457,0.07753,0.066605,0.10318,0.129608,0.196182,potato_early_blight,True
1,143.345734,146.974365,123.919739,0.090393,0.09465,0.055603,0.089172,0.10643,0.103348,0.075806,0.094772,0.115341,0.174484,potato_early_blight,True
2,136.50386,144.921799,128.904846,0.131882,0.109879,0.054016,0.051819,0.060394,0.056381,0.058563,0.109299,0.14415,0.223618,potato_early_blight,True
3,108.44577,128.540268,128.710266,0.104446,0.099991,0.058975,0.07666,0.094559,0.085648,0.070175,0.09819,0.120728,0.190628,potato_early_blight,True
4,136.750015,144.881912,130.400299,0.110397,0.101013,0.056168,0.072495,0.086349,0.079117,0.069305,0.102295,0.126862,0.195999,potato_early_blight,True


In [12]:
# cleaned images
cleaned_leaves_df = pd.DataFrame()

for key in clean_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering_with_lbp(clean_images[key],key)
    cleaned_leaves_df = pd.concat([cleaned_leaves_df, leaf_df], axis = 0)
    
clean_X = cleaned_leaves_df.drop(columns= ["label",'isDiseased'], axis = 1)
clean_y = cleaned_leaves_df[["label",'isDiseased']]

Collecting features for potato_early_blight
Collecting features for potato_healthy
Collecting features for potato_late_blight
Collecting features for Tomato_bacterial_spot
Collecting features for Tomato_healthy
Collecting features for Tomato_leaf_mold
Collecting features for Tomato_septoria_leaf_spot


In [13]:
# SVM Model Maker
Leaf_svm = SVC(kernel='linear', C = 1)
kf = KFold(n_splits= 5, shuffle = True)

In [14]:
# Cross_val_score unclean
scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['isDiseased'], cv=kf)
print("The binary SVM with LBPH and unclean images Cross Val score = ",np.average(np.absolute(scores)))

# Cross_val_score clean
scores = cross_val_score(Leaf_svm, clean_X, clean_y['isDiseased'], cv=kf)
print("The binary SVM with LBPH and cleaned image Cross Val score = ",np.average(np.absolute(scores)))

The binary SVM with LBPH and unclean images Cross Val score =  0.9311935929968718
The binary SVM with LBPH and cleaned image Cross Val score =  0.8109414481545629


In [15]:
# Cross_val_score unclean
scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['label'], cv=kf)
print("The disease classification SVM with LBPH and unclean images  Cross Val score = ",np.average(np.absolute(scores)))

# Cross_val_score clean
scores = cross_val_score(Leaf_svm, clean_X, clean_y['label'], cv=kf)
print("The disease classification SVM with LBPH and cleaned image clean Cross Val score = ",np.average(np.absolute(scores)))

The disease classification SVM with LBPH and unclean images  Cross Val score =  0.6195937218888038
The disease classification SVM with LBPH and cleaned image clean Cross Val score =  0.6243460748378782


In [16]:
# # The GLCM version

# # uncleaned images
# uncleaned_leaves_df = pd.DataFrame()

# for key in leaf_images.keys():
#     print("Collecting features for",key)
#     leaf_df = feature_gathering_with_glcm(leaf_images[key],key)
#     uncleaned_leaves_df = pd.concat([uncleaned_leaves_df, leaf_df], axis = 0)
    
# unclean_X = uncleaned_leaves_df.drop(columns= ["label",'isDiseased'], axis = 1)
# unclean_y = uncleaned_leaves_df[["label","isDiseased"]]

# # cleaned images
# cleaned_leaves_df = pd.DataFrame()

# for key in clean_images.keys():
#     print("Collecting features for",key)
#     leaf_df = feature_gathering_with_glcm(clean_images[key],key)
#     cleaned_leaves_df = pd.concat([cleaned_leaves_df, leaf_df], axis = 0)
    
# clean_X = cleaned_leaves_df.drop(columns= ["label",'isDiseased'], axis = 1)
# clean_y = cleaned_leaves_df[["label",'isDiseased']]

# # Cross_val_score unclean
# scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['isDiseased'], cv=kf)
# print("The binary SVM with GLCM and unclean images Cross Val score = ",np.average(np.absolute(scores)))

# # Cross_val_score clean
# scores = cross_val_score(Leaf_svm, clean_X, clean_y['isDiseased'], cv=kf)
# print("The binary SVM with GLCM and cleaned image Cross Val score = ",np.average(np.absolute(scores)))

# # Cross_val_score unclean
# scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['label'], cv=kf)
# print("The disease classification SVM with GLCM and unclean images Cross Val score = ",np.average(np.absolute(scores)))

# # Cross_val_score clean
# scores = cross_val_score(Leaf_svm, clean_X, clean_y['label'], cv=kf)
# print("The disease classification SVM with GLCM and clean images  Cross Val score = ",np.average(np.absolute(scores)))