#### CSML1020 Course Project - New Plant Diseases Dectection
#### Authors (Group 3): Paul Doucet, Jerry Khidaroo

#### Initilization

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# import seaborn as sns
import os

In [2]:
import tensorflow as tf


In [3]:
# Use this to disable GPU
tf.config.set_visible_devices([], 'GPU')

In [4]:
from keras.applications.vgg16 import VGG16
import keras
from sklearn.model_selection import train_test_split
from keras import backend
from keras.layers import Dense
from keras.layers import Flatten
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D
from keras.optimizers import SGD
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization

Using TensorFlow backend.


#### Data Importing

In [5]:
# Image Data Folders
# dataDirTrain = '../NewPlantDiseasesDatasetSample/train'
# dataDirValidate = '../NewPlantDiseasesDatasetSample/valid'
# dataDirTest = '../NewPlantDiseasesDatasetSample/test'

dataDirTrain = '../NewPlantDiseasesDataset/train'
dataDirValidate = '../NewPlantDiseasesDataset/valid'
dataDirTest = '../NewPlantDiseasesDataset/test'

#### Load Train Image Data with Best Augmentation Filters

In [6]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.2, fill_mode='nearest')
valid_datagen = ImageDataGenerator(rescale=1./255)

# batch_size = 128
batch_size = 48
training_set = train_datagen.flow_from_directory(dataDirTrain, target_size=(224, 224), batch_size=batch_size, class_mode='categorical')
valid_set = valid_datagen.flow_from_directory(dataDirValidate, target_size=(224, 224), batch_size=batch_size, class_mode='categorical')

train_num = training_set.samples
valid_num = valid_set.samples

Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.


#### Perform Hyper-Parameter Tuning on Selected Model

In [7]:
# Importing Keras libraries and packages
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers

def create_model_alexnet(activation='softmax', learning_rate=0.01):
    # Initializing the CNN
    classifier = Sequential()

    # Convolution Step 1
    classifier.add(Convolution2D(96, 11, strides = (4, 4), padding = 'valid', input_shape=(224, 224, 3), activation = 'relu'))

    # Max Pooling Step 1
    classifier.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), padding = 'valid'))
    classifier.add(BatchNormalization())

    # Convolution Step 2
    classifier.add(Convolution2D(256, 11, strides = (1, 1), padding='valid', activation = 'relu'))

    # Max Pooling Step 2
    classifier.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), padding='valid'))
    classifier.add(BatchNormalization())

    # Convolution Step 3
    classifier.add(Convolution2D(384, 3, strides = (1, 1), padding='valid', activation = 'relu'))
    classifier.add(BatchNormalization())

    # Convolution Step 4
    classifier.add(Convolution2D(384, 3, strides = (1, 1), padding='valid', activation = 'relu'))
    classifier.add(BatchNormalization())

    # Convolution Step 5
    classifier.add(Convolution2D(256, 3, strides=(1,1), padding='valid', activation = 'relu'))

    # Max Pooling Step 3
    classifier.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2), padding = 'valid'))
    classifier.add(BatchNormalization())

    # Flattening Step
    classifier.add(Flatten())

    # Full Connection Step
    classifier.add(Dense(units = 4096, activation = 'relu'))
    classifier.add(Dropout(0.4))
    classifier.add(BatchNormalization())
    classifier.add(Dense(units = 4096, activation = 'relu'))
    classifier.add(Dropout(0.4))
    classifier.add(BatchNormalization())
    classifier.add(Dense(units = 1000, activation = 'relu'))
    classifier.add(Dropout(0.2))
    classifier.add(BatchNormalization())
    classifier.add(Dense(units = 38, activation = activation))

    classifier.load_weights('best_weights_9.hdf5')

    # we chose to train the top 2 conv blocks, i.e. we will freeze
    # the first 8 layers and unfreeze the rest:
    for i, layer in enumerate(classifier.layers[:20]):
        layer.trainable = False

    # Compiling the Model
    classifier.compile(optimizer=optimizers.SGD(lr=learning_rate, momentum=0.9, decay=0.005), loss='categorical_crossentropy', metrics=['accuracy'])
   
    return classifier

In [8]:
from keras.optimizers import Adam
import keras

def create_model_D(activation='softmax', learning_rate=0.01):
    classifier = Sequential()
    classifier.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
    classifier.add(MaxPooling2D((2, 2)))
    classifier.add(Dropout(0.2))
    classifier.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
    classifier.add(MaxPooling2D((2, 2)))
    classifier.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_uniform'))
    classifier.add(MaxPooling2D((2, 2)))
    classifier.add(Flatten())
    classifier.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    classifier.add(Dense(38,activation=activation))
    
    opt = Adam(lr=learning_rate) 
    
    classifier.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

    return classifier

In [9]:
from keras.optimizers import Adam
import keras

def create_model_01(activation='softmax', learning_rate=0.01):
    classifier = Sequential()
    classifier.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(224,224,3)))
    classifier.add(MaxPooling2D((2, 2), strides = (2, 2), padding = 'valid'))
    classifier.add(BatchNormalization())

    classifier.add(Flatten())

    #classifier.add(Dense(units = 64, activation = 'relu'))
    classifier.add(Dropout(0.2))
    classifier.add(BatchNormalization())

    #classifier.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    classifier.add(Dense(38,activation=activation))
    
    opt = Adam(lr=learning_rate)  
    
    classifier.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

    return classifier

In [10]:
from keras.optimizers import Adam
from keras.applications.vgg16 import VGG16
import keras

def create_model_vgg16(activation='softmax', learning_rate=0.01):
    base_model=VGG16(include_top=False,input_shape=(224,224,3))
    base_model.trainable=False

    classifier=keras.models.Sequential()
    classifier.add(base_model)
    classifier.add(Flatten())
    classifier.add(Dense(38,activation=activation))

    opt = Adam(lr=learning_rate)    
    classifier.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

    return classifier

In [11]:
import keras
from keras.callbacks import ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold, cross_val_score
from keras import regularizers

# checkpoint for Alexnet Model
weightpath = "best_weights_9.hdf5"
checkpoint = ModelCheckpoint(weightpath, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max')
callbacks_list = [checkpoint]

model_ids = ['Alexnet','Vgg16','Model_D']
fn_names = [create_model_alexnet, create_model_vgg16, create_model_D]

X_train, y_train = next(training_set)

# Define the parameters to try out
params = {'activation': ["softmax"],
          'batch_size': [32, 48, 64], 
          'epochs': [3, 5, 10],
          'learning_rate': [0.01, 0.001, 0.0001]}
for fn_name, model_id in zip(fn_names, model_ids):
    # Create a KerasClassifier
    model = KerasClassifier(build_fn = fn_name, verbose = 0)

    # Create a randomize search cv object passing in the parameters to try
    random_search = RandomizedSearchCV(model, param_distributions = params, cv = KFold(3), verbose=0)    

    if model_id == 'Alexnet':
        random_search_results = random_search.fit(X_train, y_train, callbacks=callbacks_list)
    else:
        random_search_results = random_search.fit(X_train, y_train)

    # Print best score and parameters
    print(model_id, " - Best Score: ", random_search_results.best_score_, "and Best Params: ", random_search_results.best_params_)

Alexnet  - Best Score:  0.9791666666666666 and Best Params:  {'learning_rate': 0.01, 'epochs': 3, 'batch_size': 64, 'activation': 'softmax'}
Vgg16  - Best Score:  0.10416666666666667 and Best Params:  {'learning_rate': 0.001, 'epochs': 10, 'batch_size': 32, 'activation': 'softmax'}
Model_D  - Best Score:  0.041666666666666664 and Best Params:  {'learning_rate': 0.01, 'epochs': 5, 'batch_size': 64, 'activation': 'softmax'}


In [12]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
from keras.optimizers import Adam
from keras import regularizers

model_ids = ['Alexnet','Vgg16','Model_D']
fn_names = [create_model_alexnet, create_model_vgg16, create_model_D]

# Define the parameters to try out
params = {'activation': ["softmax"],
          'batch_size': [32, 48, 64], 
          'epochs': [3, 5, 10],
          'learning_rate': [0.01, 0.001, 0.0001]}

X_train, y_train = next(training_set)

for fn_name, model_id in zip(fn_names, model_ids):
    # Create a KerasClassifier
    model = KerasClassifier(build_fn = fn_name, verbose = 0)

    # Create a grid search cv object passing in the parameters to try
    grid_search = GridSearchCV(model, params, cv = KFold(3), verbose=1)

    if model_id == 'Alexnet':
        grid_search_results = grid_search.fit(X_train, y_train, callbacks=callbacks_list)
    else:
        grid_search_results = grid_search.fit(X_train, y_train)

    # Print best score and parameters
    print(model_id, " - Best Score: ", grid_search_results.best_score_, "and Best Params: ", grid_search_results.best_params_)

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed:  5.2min finished
Alexnet  - Best Score:  0.9583333333333334 and Best Params:  {'activation': 'softmax', 'batch_size': 32, 'epochs': 3, 'learning_rate': 0.01}
Fitting 3 folds for each of 27 candidates, totalling 81 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
