In [121]:
#imports
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
import h5py
import glob
import warnings
from numpy import mean
from numpy import std
from numpy import absolute
from pandas import read_csv
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.model_selection import RepeatedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import joblib
import pandas as pd
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential
from keras.preprocessing import image
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Convolution2D,Dense,MaxPool2D,Activation,Dropout,Flatten
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import load_model
from matplotlib import pyplot as plt
import matplotlib.image as mpimg

#parameters
images_per_class       = 800
fixed_size             = tuple((500, 500))
train_path             = "/Final_Datasets/Diseased_+_Healthy"
h5_train_data          = '/tier1/output/train_data.h5'
h5_train_labels        = '/tier1/output/train_labels.h5'
bins                   = 8
Observe_Image = 'INSERT_IMG_PATH_HERE'

resultsdic = {}
finalres = []

In [122]:
#Image format conversions
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

In [123]:
# Image Manipulation
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result


In [124]:
# Feature Manipulation, one of 3 features to alter.
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

def fd_haralick(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick

def fd_histogram(image, mask=None):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [127]:
# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# declare lists
global_features = []
global_features1 = []
labels          = []

['diseased', 'healthy']


In [128]:
# loop over the training data sub-folders to iterate over image.
for training_name in train_labels:
    dir = train_path + "/" + training_name
    print(dir)
    current_label = training_name
    #nested loop
    for x in os.listdir(dir):
        # get the image file name
        file = dir + "/" + str(x)

        # read the image and resize it to a fixed-size
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)
        
        RGB_BGR       = rgb_bgr(image)
        BGR_HSV       = bgr_hsv(RGB_BGR)
        IMG_SEGMENT   = img_segmentation(RGB_BGR,BGR_HSV)

        # Global Fetaure Descriptor
        fv_hu_moments = fd_hu_moments(IMG_SEGMENT)
        fv_haralick   = fd_haralick(IMG_SEGMENT)
        fv_histogram  = fd_histogram(IMG_SEGMENT)
        
        # Update
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
        labels.append(current_label)
        global_features.append(global_feature)
    print("processed folder: {}".format(current_label))
print("Global Feature Extraction DONE")

## TESTING OBSERVE_IMAGE ###
file1 = Observe_Image
image1 = cv2.imread(file1)
image1 = cv2.resize(image1, fixed_size)

RGB_BGR1       = rgb_bgr(image1)
BGR_HSV1       = bgr_hsv(RGB_BGR1)
IMG_SEGMENT1   = img_segmentation(RGB_BGR1,BGR_HSV1)

fv_hu_moments1 = fd_hu_moments(IMG_SEGMENT1)
fv_haralick1   = fd_haralick(IMG_SEGMENT1)
fv_histogram1  = fd_histogram(IMG_SEGMENT1)

global_feature1 = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])

global_features1.append(global_feature)

C:/Users/johan/OneDrive/Documents/GitHub/DGMD-S-17/Final_Datasets/Diseased_+_Healthy/diseased
processed folder: diseased
C:/Users/johan/OneDrive/Documents/GitHub/DGMD-S-17/Final_Datasets/Diseased_+_Healthy/healthy
processed folder: healthy
Global Feature Extraction DONE


In [129]:
# Vector + Training Sizes
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))\
print("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] feature vector size (1709, 532)


In [131]:
# Encoding Labels
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [132]:
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
rescaled_features1 = scaler.fit_transform(global_features1)
print("[STATUS] feature vector normalized...")

[STATUS] feature vector normalized...


In [133]:
# Check Status
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

[STATUS] target labels: [0 0 0 ... 1 1 1]
[STATUS] target labels shape: (1709,)


#### **Save Feature Vector using HDF5**

In [134]:
print(h5_train_data)
h5f_data = h5py.File(h5_train_data, 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

h5f_label = h5py.File(h5_train_labels, 'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))

h5f_data.close()
h5f_label.close()

C:/Users/johan/OneDrive/Documents/GitHub/DGMD-S-17/tier1/output/train_data.h5


#### **Algorithm Training**
**LR, KNN, SVM**

In [135]:
# training
#-----------------------------------
# TRAINING OUR MODEL
#-----------------------------------

warnings.filterwarnings('ignore')

#--------------------
# tunable-parameters
#--------------------
num_trees = 100
test_size = 0.20
seed      = 9
train_path = "/Final_Datasets/Diseased_+_Healthy"
test_path  = "/tier1/test"
h5_train_data    = '/tier1/output/train_data.h5'
h5_train_labels  = '/tier1/output/train_labels.h5'
accuracyscore    = "accuracy"

# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()

if not os.path.exists(test_path):
    os.makedirs(test_path)

# create all the machine learning models
models = []
models.append(('LR', LogisticRegression(random_state=seed)))
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVM', SVC(random_state=seed)))


# variables to hold the results and names
results = []
names   = []

# import the feature vector and trained labels
h5f_data  = h5py.File(h5_train_data, 'r')
h5f_label = h5py.File(h5_train_labels, 'r')

global_features_string = h5f_data['dataset_1']
global_labels_string   = h5f_label['dataset_1']

global_features = np.array(global_features_string)
global_labels   = np.array(global_labels_string)

h5f_data.close()
h5f_label.close()

# verify the shape of the feature vector and labels
print("[STATUS] features shape: {}".format(global_features.shape))
print("[STATUS] labels shape: {}".format(global_labels.shape))
print("[STATUS] training started...")


[STATUS] features shape: (1709, 532)
[STATUS] labels shape: (1709,)
[STATUS] training started...


In [136]:
# Split Data
(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
                                                                                          np.array(global_labels),
                                                                                          test_size=test_size,
                                                                                          random_state=seed)

print("[STATUS] split train and test data...")
print("Train data  : {}".format(trainDataGlobal.shape))
print("Test data   : {}".format(testDataGlobal.shape)) 

[STATUS] split train and test data...
Train data  : (1367, 532)
Test data   : (342, 532)


In [137]:
trainDataGlobal
print(trainDataGlobal.shape)

(1367, 532)


In [138]:
# 10-fold CV, iterating over the three algorithms.
for name, model in models:
    kfold = KFold(n_splits=10, random_state=seed)
    cv_results = cross_val_score(model, trainDataGlobal, trainLabelsGlobal, cv=kfold, scoring=accuracyscore)
    names.append(name)
    resultsdic[name] = cv_results.mean()
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

LR: 0.917341 (0.043566)
KNN: 0.930523 (0.025313)
SVM: 0.918076 (0.038037)


In [140]:
Keymax = max(zip(resultsdic.values(), resultsdic.keys()))[1]
ik = 0
while ik < len(models) - 1:
    if Keymax == models[ik][0]:
        m = models[ik][1]
        print(Keymax + ' has highest score. Using ' + str(m))
    else:
        pass
    ik += 1

m.fit(trainDataGlobal, trainLabelsGlobal)
y_predict = m.predict(rescaled_features1)

KNN has highest score. Using KNeighborsClassifier()
[1 1]


In [None]:
print(classification_report(testLabelsGlobal,y_predict))

# Final Accuracy Score
from sklearn.metrics import accuracy_score
tier1_accuracy_score = accuracy_score(testLabelsGlobal, y_predict)
print('Overall Accuracy score for Tier 1: ' + str(tier1_accuracy_score))

Overall Accuracy score for Tier 1: 0.9269005847953217


#### **Test-Train Data**
**Split the dataset**

In [158]:
def get_files(directCNN):
  if not os.path.exists(directCNN):
    return 0
  count=0
  for current_path,dirs,files in os.walk(directCNN):
    for dr in dirs:
      count+= len(glob.glob(os.path.join(current_path,dr+"/*")))
  return count
train_dir ="/Final_Datasets/CNN_Dataset"
test_dir = "/tier2/test"


In [160]:
#image count
train_samples =get_files(train_dir)
num_classes=len(glob.glob(train_dir+"/*"))
test_samples=get_files(test_dir)
print(num_classes,"Classes")
print(train_samples,"Train images")
print(test_samples,"Test images")

25 Classes
30247 Train images
0 Test images


#### **Image Augumentation**

In [161]:
train_datagen=ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
    )
test_datagen=ImageDataGenerator(rescale=1./255)

In [162]:
input_shape=(224,224,3)
tgen =train_datagen.flow_from_directory(train_dir,target_size=(224,224),batch_size=32)
test_generator=test_datagen.flow_from_directory(test_dir,shuffle=True,target_size=(224,224),batch_size=32)

Found 30247 images belonging to 25 classes.
Found 0 images belonging to 0 classes.


#### **CNN**

In [163]:
model = Sequential()
model.add(Conv2D(32, (5, 5),input_shape=input_shape,activation='relu',name="conv2d_1"))
model.add(MaxPooling2D(pool_size=(3, 3),name="max_pooling2d_1"))
model.add(Conv2D(32, (3, 3),activation='relu',name="conv2d_2"))
model.add(MaxPooling2D(pool_size=(2, 2),name="max_pooling2d_2"))
model.add(Conv2D(64, (3, 3),activation='relu',name="conv2d_3"))
model.add(MaxPooling2D(pool_size=(2, 2),name="max_pooling2d_3"))   
model.add(Flatten(name="flatten_1"))
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128,activation='relu'))          
model.add(Dense(num_classes,activation='softmax'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 220, 220, 32)      2432      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 73, 73, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 71, 71, 32)        9248      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 35, 35, 32)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 33, 33, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 16, 16, 64)      

In [164]:
validation_generator = train_datagen.flow_from_directory(
                       test_dir,
                       target_size=(224, 224),
                       batch_size=32)

Found 0 images belonging to 0 classes.


In [165]:
model.compile(optimizer='adam',loss = 'categorical_crossentropy',metrics=['accuracy'])
history1 = model.fit(
    tgen,#egitim verileri
    steps_per_epoch=None,
    epochs=2,
    validation_data=validation_generator,
    validation_steps=None,
    verbose=1,
    callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.3,patience=3, min_lr=0.000001)],
    shuffle=True
    )

Epoch 1/2
Epoch 2/2


In [166]:
model.save('/tier2/plant_disease_Cnn.h5')

#### **Testing**

In [167]:

model_cnn=load_model('/tier2/plant_disease_Cnn.h5')

def prepare(imgPATH):
    img = tf.keras.utils.load_img(imgPATH, target_size=(224,224))
    img2 = tf.keras.utils.img_to_array(img)
    img2 = img2/255
    return np.expand_dims(img2, axis=0)

#Looping over images in testing.
for test_file in os.listdir(test_dir):
    #print(test_file)
    classes=list(tgen.class_indices.keys())
    flink = test_dir + '/' + test_file
    cnnresult = model_cnn.predict([prepare(flink)]) #Passes through the model after preparing the image.
    disease = mpimg.imread(flink)
    plt.figure()
    #plt.imshow(disease) #present images as figure

    classresult=np.argmax(cnnresult,axis=1)
    #print(classes[classresult[0]])

print(Observe_Image)
classes=list(tgen.class_indices.keys())
cnnresult = model_cnn.predict([prepare(Observe_Image)]) 
disease = mpimg.imread(Observe_Image)
plt.figure()
plt.imshow(disease)
classresult=np.argmax(cnnresult,axis=1)
print(classes[classresult[0]])
if y_predict[0] == 1:
    if classes[classresult[0]].find('healthy') == -1:
        print('True_Positive')
    else:
        print('False_Negative')
else:
    if classes[classresult[0]].find('healthy') == -1:
        print('False_Positive')
    else:
        print('True_Negative')


PATH


FileNotFoundError: [Errno 2] No such file or directory: 'PATH'

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>