In [None]:
import numpy as np
import glob
import pickle
import seaborn as sns
from keras.layers import Dense
from keras.utils.vis_utils import plot_model
import cv2
import os
import pandas as pd
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Flatten, Dropout, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import  classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import multilabel_confusion_matrix

In [None]:
file_path ='../input/plantvillage-apr-8/Plant_leave_diseases_dataset'
plant_directory = listdir(file_path)
# print(len(plant_directory))
# print(plant_directory)
plant_directory

In [None]:
# return all file paths that match file_path+'/**/*.*'
plantfilepath = list(glob.glob(file_path  + '/**/*.*')) 
# print(plantfilepath)
print(len(plantfilepath))
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], plantfilepath))
# print(labels)
print(len(labels))

# Labels and its path
filepath = pd.Series(plantfilepath, name='Filepath').astype(str)
# print(type(filepath))
# print(filepath)

labels = pd.Series(labels, name='Label')
# print(type(labels))

# each instance along with its file path
# concatenating along the columns (axis=1) to form a DataFrame
data = pd.concat([filepath, labels], axis=1)
data = data.sample(frac=1).reset_index(drop=True)
# data

**Image distribution**

In [None]:
# plt.figure(figsize=(16, 8))
# count = data.Label.value_counts()
# print(count)
# sns.set(style="whitegrid")
# sns.barplot(x=count.index, y=count, alpha=0.9)
# plt.title('PlantVillage Image Frequency Distribution (Augmented Data)', fontsize=20)
# plt.ylabel('Number of Images', fontsize=15)
# plt.xlabel('Plant Spicies', fontsize=15)
# plt.xticks(rotation=90)
# plt.show()

In [None]:
EPOCHS = 600
INIT_LR = 1e-3
BS = 42
default_image_size = tuple((100, 100))
image_size = 0
directory_root = '../input/plantvillage-apr-8'
width=100
height=100
depth=3

**Convert images to array**

In [None]:
def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            image = cv2.resize(image, default_image_size)   
            return img_to_array(image)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

**Create a list of arrays for images**

In [None]:
# image_list keeps the array format of image
# label_list keeps the name of the species and disease
image_list, label_list = [], []
# list of names of all the files present in the specified path
try:
    print("Fetching images ...")
    root_dir = listdir(directory_root)
#     print("1", root_dir)
    for directory in root_dir :
#         print("2", directory)
        # remove .DS_Store from list
        if directory == ".DS_Store" :
            root_dir.remove(directory)

    for plant_folder in root_dir :
        plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}")
#         print("plant_disease_folder_list")
#         print(plant_disease_folder_list)
#         print(len(plant_disease_folder_list))

        for disease_folder in plant_disease_folder_list :
            # remove .DS_Store from list
            if disease_folder == ".DS_Store" :
                plant_disease_folder_list.remove(disease_folder)

        for plant_disease_folder in plant_disease_folder_list:
            print(f"Processing {plant_disease_folder} ...")
            plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/{plant_disease_folder}/")
#             print(plant_disease_image_list)    
            for single_plant_disease_image in plant_disease_image_list :
                if single_plant_disease_image == ".DS_Store" :
                    plant_disease_image_list.remove(single_plant_disease_image)

            for image in plant_disease_image_list[:600]:
                image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"
                if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True:
                    image_list.append(convert_image_to_array(image_directory))
                    label_list.append(plant_disease_folder)
    print("Image loading completed")  
except Exception as e:
    print(f"Error : {e}")

**Get Size of Processed Image**

In [None]:
image_size = len(image_list)
image_size

**Transform Image Labels uisng Scikit Learn's LabelBinarizer**


In [None]:
labelBinarizer = LabelBinarizer()
image_labels = labelBinarizer.fit_transform(label_list)
pickle.dump(labelBinarizer,open('label_transform.pkl', 'wb'))  # write binary
n_classes = len(labelBinarizer.classes_)

In [None]:
labelBinarizer.classes_


In [None]:
np_image_list = np.array(image_list, dtype=np.float16) / 223.0
# print(image_list)
# len(np_image_list)
# print(np_image_list)

In [None]:
print("Spliting data to train, test, and valid")


ratio_train = 0.80
ratio_val = 0.10
ratio_test = 0.10

# Produces test split.
x_remaining, x_test, y_remaining, y_test = train_test_split(
    np_image_list, image_labels, test_size=ratio_test , random_state = 42)

# Adjusts val ratio, w.r.t. remaining dataset.
ratio_remaining = 1 - ratio_test
ratio_val_adjusted = ratio_val / ratio_remaining

# Produces train and val splits.
x_train, x_val, y_train, y_val = train_test_split(
    x_remaining, y_remaining, test_size=ratio_val_adjusted , random_state = 42)


print("x_train: ", len(x_train))
print("x_val: ", len(x_val))
print("x_test: ", len(x_test))

# print("x_test ",x_test)
# print("y_test", y_test)

print(len(y_train))
print(len(y_val))
print(len(y_test))

# x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, 
#                                                     random_state = 42) 

In [None]:
# fig, axes = plt.subplots(nrows=10, ncols=3, figsize=(10,18), subplot_kw={'xticks':[],'yticks':[]})

# for i, ax in enumerate(axes.flat):
          
#     ax.imshow(plt.imread(data.Filepath[i]))
#     ax.set_title(data.Label[i])
   
    
# plt.tight_layout()
# plt.show()

In [None]:
aug = ImageDataGenerator(
    rotation_range=25, 
    width_shift_range=0.1,
    height_shift_range=0.1, 
    shear_range=0.2, 
    zoom_range=0.2,
    horizontal_flip=True, 
    fill_mode="nearest")

In [None]:
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth, height, width)
    chanDim = 1

# filters, kernel_size, strides=(1, 1), padding='valid', 
# data_format=None, dilation_rate=(1, 1), activation=None, 
# use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', 
# kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, 
# kernel_constraint=None, bias_constraint=None

    
model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation("softmax"))
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
model.summary()


In [None]:
opt = Adam(learning_rate =INIT_LR, decay=INIT_LR / EPOCHS)
# distribution
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])
# train the network
print("Training the model...")

In [None]:
history = model.fit_generator(
    aug.flow(x_train, y_train, batch_size=BS),
    validation_data=(x_val, y_val),   # x_test , y_train
    steps_per_epoch=len(x_train) // BS,
    epochs=EPOCHS, verbose=1
    )


# make a prediction
predict_x=model.predict(x_test) 
classes_x=np.argmax(predict_x,axis=1)
# print(classes_x)
out_labels = [labelBinarizer.classes_[i] for i in classes_x]
# print(out_labels)



y_truth=np.argmax(y_test,axis=1)
print(y_truth)
y_truth_ = [labelBinarizer.classes_[i] for i in y_truth]
# print(y_truth_)


# y_test_arg=np.argmax(y_test,axis=1)

# classes_x=np.argmax(y_test,axis=1)
# out_labels = [labelBinarizer.classes_[i] for i in y_test]
# print(out_labels)



cm = multilabel_confusion_matrix(y_truth_, out_labels)
print(cm)

print(classification_report(y_truth_, out_labels))

In [None]:
# average accuracy
mean_acc = np.mean(history.history['accuracy'])
print(mean_acc)
max_acc = np.max(history.history['accuracy'])
print(max_acc)
# min_acc = np.mean(history.history['accuracy'])
# print(min_acc)

# mean_acc = np.mean(history.history['val_accuracy'])
# print(mean_acc)
# max_acc = np.max(history.history['val_accuracy'])
# print(max_acc)

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()

In [None]:
model.save('plant_disease')
model.save("plant_disease.h5")

In [None]:
print("Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")
print(f"Test Loss: {scores[0]}")
print(scores)