In [9]:
# Packages that may need to be installed to run
#!pip install opencv-python

In [1]:
import numpy as np 
import cv2
import os
import pandas as pd
import glob
import matplotlib.pyplot as plt       
%matplotlib inline

from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops

In [4]:
# Data Cleaning
# Data source cite: https://github.com/gabrieldgf4/PlantVillage-Dataset
# Repost from the plantvillage.org repository that is no longer available

# Setting filters for the images hsv colors
low_hsv = (0,60,0)
high_hsv = (179,255,255)

#cleaning the images
def cleaning_image(images):
    clean_image= []
    for image in images:
        gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(gray_scale, low_hsv,high_hsv)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones((8,8),dtype=np.uint8))
        clean_image.append(cv2.bitwise_and(image, image,mask=mask))
    return clean_image

In [24]:
jgp_list = ['PotatoDataset','TomatoDataset']
png_list = ['GrapeDataset','PapayaDataset']

# Parameter: of folder names to look into grab images for
# Returns: Dictionary of images {leaf_name : array of leaf image(BGR)}
# note: for now doesn't clean the background
def get_images(jgp_list = [], png_list=[]):
    if len(jgp_list) + len(png_list) == 0:
        return None
    leaf_images = {}
    clean_leaf_images = {}
    count = 1
    PlantDataset = glob.glob(r'LeavesDataset\*')
    
    for plant in PlantDataset:
        plant_name = plant.split('\\')[1]
        print("Getting",plant_name)
        LeavesDataset = glob.glob(plant+'\\*')
        
        for leaves in LeavesDataset:
            leaves_name = leaves.split("\\")[2]
            if plant_name in jgp_list:
                print("\tGetting",leaves_name,":", end="\t")
                leaf_images[leaves_name] = [cv2.imread(file) for file in glob.glob(leaves+'\\*.JPG')]
                print("load",len(leaf_images[leaves_name]),"images")
                # clean_leaf_images[leaves_name] = cleaning_image(leaf_images[leaves_name])
            elif plant_name in png_list:
                print("\tGetting",leaves_name,":", end="\t")
                leaf_images[leaves_name] = [cv2.imread(file) for file in glob.glob(leaves+'\\*.PNG')]
                print("load",len(leaf_images[leaves_name]),"images")
                # clean_leaf_images[leaves_name] = cleaning_image(leaf_images[leaves_name])
            
    print("Complete")
    return leaf_images #, clean_leaf_images

leaf_disease_images = get_images(jgp_list,png_list)

# png_list = []
# jgp_list = ['NutrientDeficiency']

# leaf_deficiency_images = get_images(jgp_list,png_list)

Getting EggplantDataset
Getting GrapeDataset
	Getting grape_black_rot :	load 590 images
	Getting grape_brown_spot :	load 782 images
	Getting grape_healthy :	load 837 images


SystemError: <built-in function cvtColor> returned a result with an exception set

In [13]:
# Image Checker
# cv2.imshow("Image", clean_images["Tomato_septoria_leaf_spot" ][9])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [14]:
# Feature gathering just color channels code

def feature_gathering_color_Channels(images, label):
    # Features to collect
    red_mean = []
    blue_mean = []
    green_mean = []
    # Target Variables
    labels = [label]*len(images)
    isDiseased = [label.find('healthy') == -1]*len(images)
    
    for image in images:
        red_mean.append(np.mean(image[:,:,0]))
        blue_mean.append(np.mean(image[:,:,1]))
        green_mean.append(np.mean(image[:,:,2]))
        
    return pd.DataFrame({"label":labels, "isDiseased":isDiseased,
                         "RedMean":red_mean, "BlueMean":blue_mean, "GreenMean": green_mean}) #Feature variables

In [10]:
def compute_lbp_histogram(image, num_points=8, radius=1, method="uniform"):
    if len(image.shape) == 3 and image.shape[2] == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image
    
    # Compute LBP codes
    lbp = local_binary_pattern(gray, num_points, radius, method=method)
    
    # Build the histogram
    n_bins = num_points + 2
    hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
    
    # Normalize
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    
    return hist

In [12]:
def feature_gathering_with_lbp(images, label):
    data = []
    
    for image in images:
        # If image is None, skip or fill with NaNs
        if image is None:
            row = {
                "RedMean": np.nan,
                "GreenMean": np.nan,
                "BlueMean": np.nan
            }
            for i in range(10):
                row[f"LBP_{i}"] = np.nan
            row["label"] = label
            data.append(row)
            continue
        
        # Color means (OpenCV = BGR)
        red_mean   = np.mean(image[:, :, 2])
        green_mean = np.mean(image[:, :, 1])
        blue_mean  = np.mean(image[:, :, 0])
        
        # Compute LBP histogram
        lbp_hist = compute_lbp_histogram(image, num_points=8, radius=1, method="uniform")
        
        # Build row
        row = {
            "RedMean": red_mean,
            "GreenMean": green_mean,
            "BlueMean": blue_mean
        }
        
        # Add LBP histogram bins as separate features
        for i, val in enumerate(lbp_hist):
            row[f"LBP_{i}"] = val
        
        row["label"] = label
        row["isDiseased"] = label.find('healthy') == -1
        
        data.append(row)
    
    return pd.DataFrame(data)

In [17]:
# Note: the skimage import is commented out too 

def feature_gathering_with_glcm(images, label):
    # Define GLCM parameters
    properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    angle_names = ['0', '45', '90', '135']
    
    data = []
    for image in images:
        if image is None:
            # If an image failed to load, assign NaN to all features
            row = {"RedMean": np.nan, "GreenMean": np.nan, "BlueMean": np.nan}
            for prop in properties:
                for ang in angle_names:
                    row[f"{prop}_{ang}"] = np.nan
            row["label"] = label
            data.append(row)
            continue
        
        # Extract color features (note: OpenCV uses BGR)
        red_mean = np.mean(image[:, :, 2])
        green_mean = np.mean(image[:, :, 1])
        blue_mean = np.mean(image[:, :, 0])
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        glcm = graycomatrix(gray, distances=[5], angles=angles, levels=256, symmetric=True, normed=True)
        
        # Extract texture features from the GLCM for each property and angle
        glcm_features = {}
        for prop in properties:
            vals = graycoprops(glcm, prop).flatten()  
            for i, ang in enumerate(angle_names):
                glcm_features[f"{prop}_{ang}"] = vals[i]
        
        # Combine features into single row
        row = {
            "RedMean": red_mean,
            "GreenMean": green_mean,
            "BlueMean": blue_mean
        }
        row.update(glcm_features)
        row["label"] = label
        data.append(row)
        
    return pd.DataFrame(data)

In [14]:
# uncleaned images
uncleaned_leaves_df = pd.DataFrame()

for key in leaf_disease_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering_with_lbp(leaf_disease_images[key],key)
    uncleaned_leaves_df = pd.concat([uncleaned_leaves_df, leaf_df], axis = 0)
    
unclean_X = uncleaned_leaves_df.drop(columns= ["label",'isDiseased'], axis = 1)
unclean_y = uncleaned_leaves_df[["label","isDiseased"]]

NameError: name 'leaf_images' is not defined

In [19]:
uncleaned_leaves_df.head()

Unnamed: 0,RedMean,GreenMean,BlueMean,LBP_0,LBP_1,LBP_2,LBP_3,LBP_4,LBP_5,LBP_6,LBP_7,LBP_8,LBP_9,label,isDiseased
0,119.126999,121.584213,98.035309,0.111954,0.10173,0.059052,0.069702,0.084457,0.07753,0.066605,0.10318,0.129608,0.196182,potato_early_blight,True
1,143.345734,146.974365,123.919739,0.090393,0.09465,0.055603,0.089172,0.10643,0.103348,0.075806,0.094772,0.115341,0.174484,potato_early_blight,True
2,136.50386,144.921799,128.904846,0.131882,0.109879,0.054016,0.051819,0.060394,0.056381,0.058563,0.109299,0.14415,0.223618,potato_early_blight,True
3,108.44577,128.540268,128.710266,0.104446,0.099991,0.058975,0.07666,0.094559,0.085648,0.070175,0.09819,0.120728,0.190628,potato_early_blight,True
4,136.750015,144.881912,130.400299,0.110397,0.101013,0.056168,0.072495,0.086349,0.079117,0.069305,0.102295,0.126862,0.195999,potato_early_blight,True


In [16]:
# cleaned images
cleaned_leaves_df = pd.DataFrame()

for key in leaf_disease_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering_with_lbp(leaf_disease_images[key],key)
    cleaned_leaves_df = pd.concat([cleaned_leaves_df, leaf_df], axis = 0)
    
clean_X = cleaned_leaves_df.drop(columns= ["label",'isDiseased'], axis = 1)
clean_y = cleaned_leaves_df[["label",'isDiseased']]

Collecting features for grape_black_rot
Collecting features for grape_brown_spot
Collecting features for grape_healthy
Collecting features for grape_mites_disease
Collecting features for papaya_curl
Collecting features for papaya_healthy
Collecting features for papaya_mite_disease
Collecting features for papaya_mosaic
Collecting features for papaya_ringspot
Collecting features for potato_early_blight
Collecting features for potato_healthy
Collecting features for potato_late_blight
Collecting features for Tomato_bacterial_spot
Collecting features for Tomato_healthy
Collecting features for Tomato_leaf_mold
Collecting features for Tomato_septoria_leaf_spot


In [18]:
# SVM Model Maker
Leaf_svm = SVC(kernel='linear', C = 1)
kf = KFold(n_splits= 5, shuffle = True)

In [20]:
# Cross_val_score unclean
# scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['isDiseased'], cv=kf)
# print("The binary SVM with LBPH and unclean images Cross Val score = ",np.average(np.absolute(scores)))

# Cross_val_score clean
scores = cross_val_score(Leaf_svm, clean_X, clean_y['isDiseased'], cv=kf)
print("The binary SVM with LBPH and cleaned image Cross Val score = ",np.average(np.absolute(scores)))

The binary SVM with LBPH and cleaned image Cross Val score =  0.7857414448669202


In [21]:
# Cross_val_score unclean
# scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['label'], cv=kf)
# print("The disease classification SVM with LBPH and unclean images  Cross Val score = ",np.average(np.absolute(scores)))

# Cross_val_score clean
scores = cross_val_score(Leaf_svm, clean_X, clean_y['label'], cv=kf)
print("The disease classification SVM with LBPH and cleaned image clean Cross Val score = ",np.average(np.absolute(scores)))

The disease classification SVM with LBPH and cleaned image clean Cross Val score =  0.5540874524714829


In [None]:
# The GLCM version
# uncleaned images
uncleaned_leaves_df = pd.DataFrame()

for key in leaf_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering_with_glcm(leaf_images[key],key)
    uncleaned_leaves_df = pd.concat([uncleaned_leaves_df, leaf_df], axis = 0)

In [26]:
uncleaned_leaves_df

Unnamed: 0,RedMean,GreenMean,BlueMean,dissimilarity_0,dissimilarity_45,dissimilarity_90,dissimilarity_135,correlation_0,correlation_45,correlation_90,...,contrast_135,ASM_0,ASM_45,ASM_90,ASM_135,energy_0,energy_45,energy_90,energy_135,label
0,119.126999,121.584213,98.035309,28.470960,29.410856,28.176917,29.151109,0.653677,0.631393,0.660071,...,1828.500409,0.000098,0.000097,0.000099,0.000094,0.009875,0.009832,0.009971,0.009678,potato_early_blight
1,143.345734,146.974365,123.919739,24.497292,22.388621,23.533475,27.941673,0.721709,0.776474,0.747603,...,1988.594325,0.000198,0.000204,0.000215,0.000198,0.014072,0.014282,0.014652,0.014054,potato_early_blight
2,136.503860,144.921799,128.904846,28.211622,28.659171,31.013306,33.007811,0.626476,0.620185,0.545294,...,2380.895408,0.000084,0.000082,0.000079,0.000077,0.009192,0.009036,0.008904,0.008771,potato_early_blight
3,108.445770,128.540268,128.710266,22.909425,21.811492,21.944332,24.229356,0.678270,0.717418,0.714073,...,1192.207026,0.000118,0.000116,0.000117,0.000112,0.010850,0.010787,0.010808,0.010565,potato_early_blight
4,136.750015,144.881912,130.400299,24.231480,26.535462,25.963941,24.925595,0.596719,0.523896,0.542374,...,1387.785258,0.000131,0.000123,0.000125,0.000127,0.011460,0.011077,0.011191,0.011269,potato_early_blight
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,105.241379,106.999359,110.331390,36.566328,36.833318,35.021337,35.934870,0.577969,0.573300,0.614788,...,2355.041856,0.000474,0.000469,0.000648,0.000487,0.021778,0.021647,0.025447,0.022071,Tomato_septoria_leaf_spot
996,118.001083,115.806915,124.196777,27.570826,27.499795,26.188200,27.202334,0.450547,0.461293,0.511111,...,1516.297981,0.000140,0.000137,0.000144,0.000139,0.011829,0.011706,0.011996,0.011787,Tomato_septoria_leaf_spot
997,135.722290,124.013641,120.621094,20.895761,22.625126,22.691297,21.801745,0.739899,0.692044,0.687611,...,1275.677249,0.000346,0.000326,0.000333,0.000319,0.018595,0.018061,0.018235,0.017853,Tomato_septoria_leaf_spot
998,127.490173,137.704559,114.370148,24.895730,27.190964,27.096614,26.594199,0.737895,0.691176,0.689666,...,1389.898715,0.000093,0.000081,0.000081,0.000084,0.009652,0.008987,0.008985,0.009141,Tomato_septoria_leaf_spot


In [28]:
unclean_X = uncleaned_leaves_df.drop(columns= ["label"], axis = 1)
unclean_y = uncleaned_leaves_df[["label"]]

In [30]:
# cleaned images
cleaned_leaves_df = pd.DataFrame()

for key in clean_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering_with_glcm(clean_images[key],key)
    cleaned_leaves_df = pd.concat([cleaned_leaves_df, leaf_df], axis = 0)
    

Collecting features for potato_early_blight
Collecting features for potato_healthy
Collecting features for potato_late_blight
Collecting features for Tomato_bacterial_spot
Collecting features for Tomato_healthy
Collecting features for Tomato_leaf_mold
Collecting features for Tomato_septoria_leaf_spot


In [None]:
clean_X = cleaned_leaves_df.drop(columns= ["label"], axis = 1)
clean_y = cleaned_leaves_df[["label"]]

# # Cross_val_score unclean
# scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['isDiseased'], cv=kf)
# print("The binary SVM with GLCM and unclean images Cross Val score = ",np.average(np.absolute(scores)))

# # Cross_val_score clean
# scores = cross_val_score(Leaf_svm, clean_X, clean_y['isDiseased'], cv=kf)
# print("The binary SVM with GLCM and cleaned image Cross Val score = ",np.average(np.absolute(scores)))

# Cross_val_score unclean
scores = cross_val_score(Leaf_svm, unclean_X, unclean_y['label'], cv=kf)
print("The disease classification SVM with GLCM and unclean images Cross Val score = ",np.average(np.absolute(scores)))


In [None]:
# Cross_val_score clean
scores = cross_val_score(Leaf_svm, clean_X, clean_y['label'], cv=kf)
print("The disease classification SVM with GLCM and clean images  Cross Val score = ",np.average(np.absolute(scores)))