In [1]:
# Packages that may need to be installed to run
#pip install opencv-python

In [2]:
import numpy as np 
import cv2
import os
import pandas as pd
import glob
import matplotlib.pyplot as plt       
%matplotlib inline

from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [3]:
# Data Cleaning
# Data source cite: https://github.com/gabrieldgf4/PlantVillage-Dataset
# Repost from the plantvillage.org repository that is no longer available

# Setting filters for the images hsv colors
low_hsv = (0,60,0)
high_hsv = (179,255,255)

#cleaning the images
def cleaning_image(images):
    clean_image= []
    for image in images:
        gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(gray_scale, low_hsv,high_hsv)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones((8,8),dtype=np.uint8))
        clean_image.append(cv2.bitwise_and(image, image,mask=mask))
    return clean_image

In [4]:
def get_images():
    leaf_images = {}
    clean_leaf_images = {}
    count = 1
    PlantDataset = glob.glob('LeavesDataset\\*')
    
    for plant in PlantDataset:
        print("Getting",plant.split("\\")[1])
        LeavesDataset = glob.glob(plant+'\\*')
        
        for leaves in LeavesDataset:
            leaves_name = leaves.split("\\")[2]
            print("\tGetting",leaves_name,":", end="\t")
            
            leaf_images[leaves_name] = [cv2.imread(file) for file in glob.glob(leaves+'\\*.JPG')]
            clean_leaf_images[leaves_name] = cleaning_image(leaf_images[leaves_name])
            print("load",len(leaf_images[leaves_name]),"images")
            
    print("Complete")
    return leaf_images, clean_leaf_images


leaf_images,clean_images = get_images()

Getting PotatoDataset
	Getting potato_early_blight :	load 1000 images
	Getting potato_healthy :	load 152 images
	Getting potato_late_blight :	load 1000 images
Getting TomatoDataset
	Getting Tomato_bacterial_spot :	load 1000 images
	Getting Tomato_healthy :	load 1000 images
	Getting Tomato_leaf_mold :	load 952 images
	Getting Tomato_septoria_leaf_spot :	load 1000 images
Complete


In [5]:
# Image Checker
# cv2.imshow("Image", clean_images["Tomato_septoria_leaf_spot" ][9])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [6]:
leaf_images.keys()

dict_keys(['potato_early_blight', 'potato_healthy', 'potato_late_blight', 'Tomato_bacterial_spot', 'Tomato_healthy', 'Tomato_leaf_mold', 'Tomato_septoria_leaf_spot'])

In [7]:
# Feature gathering code

def feature_gathering(images, label):
    red_mean = []
    blue_mean = []
    green_mean = []
    contrast = [] # not used yet. Need to download a package to get this feature
    labels = [label]*len(images)
    for image in images:
        red_mean.append(np.mean(image[:,:,0]))
        blue_mean.append(np.mean(image[:,:,1]))
        green_mean.append(np.mean(image[:,:,2]))
        
    return pd.DataFrame({"label":labels, "RedMean":red_mean, "BlueMean":blue_mean, "GreenMean": green_mean})

In [8]:
# uncleaned images
uncleaned_leaves_df = pd.DataFrame()

for key in leaf_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering(leaf_images[key],key)
    uncleaned_leaves_df = pd.concat([uncleaned_leaves_df, leaf_df], axis = 0)
    
unclean_X = uncleaned_leaves_df.drop(columns= ["label"], axis = 1)
unclean_y = uncleaned_leaves_df["label"]

Collecting features for potato_early_blight
Collecting features for potato_healthy
Collecting features for potato_late_blight
Collecting features for Tomato_bacterial_spot
Collecting features for Tomato_healthy
Collecting features for Tomato_leaf_mold
Collecting features for Tomato_septoria_leaf_spot


In [9]:
# cleaned images
cleaned_leaves_df = pd.DataFrame()

for key in clean_images.keys():
    print("Collecting features for",key)
    leaf_df = feature_gathering(clean_images[key],key)
    cleaned_leaves_df = pd.concat([cleaned_leaves_df, leaf_df], axis = 0)
    
clean_X = cleaned_leaves_df.drop(columns= ["label"], axis = 1)
clean_y = cleaned_leaves_df["label"]

Collecting features for potato_early_blight
Collecting features for potato_healthy
Collecting features for potato_late_blight
Collecting features for Tomato_bacterial_spot
Collecting features for Tomato_healthy
Collecting features for Tomato_leaf_mold
Collecting features for Tomato_septoria_leaf_spot


In [10]:
# SVM Model Maker
Leaf_svm = SVC(kernel='rbf', C = 100,)

In [11]:
# Cross_val_score unclean
kf = KFold(n_splits= 10, shuffle = True)
scores = cross_val_score(Leaf_svm, unclean_X, unclean_y, cv=kf)
print("The unclean Cross Val score = ",np.average(np.absolute(scores)))

The unclean Cross Val score =  0.5935488717769848


In [12]:
# Cross_val_score clean
kf = KFold(n_splits= 10, shuffle = True)
scores = cross_val_score(Leaf_svm, clean_X, clean_y, cv=kf)
print("The clean Cross Val score = ",np.average(np.absolute(scores)))

The clean Cross Val score =  0.6394191194226073
