In [1]:
# Packages that may need to be installed to run
#pip install opencv-python

In [2]:
import numpy as np 
import cv2
import os
import pandas as pd
import glob
import matplotlib.pyplot as plt       
%matplotlib inline

from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [5]:
# Data Cleaning
# Data source cite: https://github.com/gabrieldgf4/PlantVillage-Dataset
# Repost from the plantvillage.org repository that is no longer available

# Setting filters for the images hsv colors
low_val = (0,60,0)
high_val = (179,255,255)


#cleaning the images
def cleaning_image(images):
    clean_image = []
    for image in images:
        gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(gray_scale, low_val,high_val)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones((8,8),dtype=np.uint8))
        clean_image.append(cv2.bitwise_and(image, image,mask=mask))
    return clean_image

# Getting healthy leaf images (#1000)
Toma_healthy = [cv2.imread(file) for file in glob.glob('TomatoDataset\\Tomato_healthy\\*.JPG')]
Toma_healthy_cleaned = cleaning_image(Toma_healthy)

# Getting moldy leaf images (# 952)
Toma_mold  = [cv2.imread(file) for file in glob.glob('TomatoDataset\\Tomato_leaf_mold\\*.JPG')]
Toma_mold_cleaned = cleaning_image(Toma_mold)

# Getting bacterial spot images (# 1000)
Toma_bact_spot  = [cv2.imread(file) for file in glob.glob('TomatoDataset\\Tomato_bacterial_spot\\*.JPG')]
Toma_bact_spot_clean = cleaning_image(Toma_bact_spot)

In [None]:
# Image Checker
# cv2.imshow("Image", Toma_bact_spot_clean[88])
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [6]:
# Feature gathering code
# Takes in image list and label, returns pandas datafram of features
def feature_gathering(images, label):
    red_mean = []
    blue_mean = []
    green_mean = []
    contrast = [] # not used yet. Need to download a package to get this feature
    labels = [label]*len(images)
    for image in images:
        red_mean.append(np.mean(image[:,:,0]))
        blue_mean.append(np.mean(image[:,:,1]))
        green_mean.append(np.mean(image[:,:,2]))
        
    return pd.DataFrame({"label":labels, "RedMean":red_mean, "BlueMean":blue_mean, "GreenMean": green_mean})

In [7]:
# uncleaned images
Toma_healthy_df = feature_gathering(Toma_healthy, 1)
Toma_mold_df = feature_gathering(Toma_mold, 2)
Toma_bact_spot_df = feature_gathering(Toma_bact_spot, 3)

uncleaned_leaves_df = pd.concat([Toma_healthy_df,Toma_mold_df,Toma_bact_spot_df], axis =0)
unclean_X = uncleaned_leaves_df.drop(columns= ["label"], axis = 1)
unclean_y = uncleaned_leaves_df["label"]

In [8]:
# cleaned images
Toma_healthy_df = feature_gathering(Toma_healthy_cleaned, 1)
Toma_mold_df  = feature_gathering(Toma_mold_cleaned, 2)
Toma_bact_spot_df = feature_gathering(Toma_bact_spot, 3)

cleaned_leaves_df = pd.concat([Toma_healthy_df,Toma_mold_df,Toma_bact_spot_df], axis =0)
clean_X = cleaned_leaves_df.drop(columns= ["label"], axis = 1)
clean_y = cleaned_leaves_df["label"]


In [9]:
# SVM Model Maker
Leaf_svm = SVC(kernel='rbf', C = 100,)

In [13]:
# Cross_val_score unclean
kf = KFold(n_splits= 10, shuffle = True)
scores = cross_val_score(Leaf_svm, unclean_X, unclean_y, cv=kf)
print("The unclean Cross Val score = ",np.average(np.absolute(scores)))

The unclean Cross Val score =  0.8065758131012368


In [14]:
# Cross_val_score clean
kf = KFold(n_splits= 10, shuffle = True)
scores = cross_val_score(Leaf_svm, clean_X, clean_y, cv=kf)
print("The clean Cross Val score = ",np.average(np.absolute(scores)))

The clean Cross Val score =  0.9481630783325696
