
# Arguments

In [33]:
batch_size = 32
dataset_path_plantvillage = '../datasets/plantvillage'
dataset_path_self = '../datasets/self'
dataset_path_validation = '../datasets/validation'

dataset_paths = [dataset_path_plantvillage, dataset_path_self, dataset_path_validation]

model_save_path = 'model-warmup.h5'

checkpoint_path = 'checkpoints-warmup.hdf5'

input_width = 224
input_height = 224
input_depth = 3

# --------------------------------------------------
num_of_epochs = 25
start_epoch = 0
# --------------------------------------------------

# Print class names

In [34]:
# Get classes
import os
import re

class_names = []
class_paths_plant_village = []
class_paths_plant_self = []
class_paths_plant_validation = []

for path in dataset_paths:
    classes = os.listdir(path)
    for i in classes:
        if(re.search('___', i)):
            if(i not in class_names):
                class_names.append(i)
            if(re.search('plantvillage', path)):
                class_paths_plant_village.append(path + '/' + i)
            elif(re.search('self', path)):
                class_paths_plant_self.append(path + '/' + i)
            else:
                class_paths_plant_validation.append(path + '/' + i)

print(class_names)   
print()
print('PlantVillage')
print(class_paths_plant_village) 
print(len(class_paths_plant_village))
print()
print('Self')
print(class_paths_plant_self) 
print(len(class_paths_plant_self))
print()
print('Validation')
print(class_paths_plant_validation) 
print(len(class_paths_plant_validation))

['___Early_blight', '___Late_blight', '___Target_Spot', '___healthy', '___Septoria_leaf_spot', '___Mosaic_Virus', '___Yellow_Leaf_Curl_Virus', '___Two-spotted_spider_mite', '___Leaf_Mold', '___Bacterial_spot', '___Appids', '___Leaf_miner', '___Curly_top_virus']

PlantVillage
['../datasets/plantvillage/___Early_blight', '../datasets/plantvillage/___Late_blight', '../datasets/plantvillage/___Target_Spot', '../datasets/plantvillage/___healthy', '../datasets/plantvillage/___Septoria_leaf_spot', '../datasets/plantvillage/___Mosaic_Virus', '../datasets/plantvillage/___Yellow_Leaf_Curl_Virus', '../datasets/plantvillage/___Two-spotted_spider_mite', '../datasets/plantvillage/___Leaf_Mold', '../datasets/plantvillage/___Bacterial_spot']
10

Self
['../datasets/self/___Early_blight', '../datasets/self/___Appids', '../datasets/self/___Leaf_miner', '../datasets/self/___Curly_top_virus']
4

Validation
['../datasets/validation/___Early_blight', '../datasets/validation/___Appids', '../datasets/validatio

# Select training classes

early blight in plantVillage dataset replaced by self one

In [35]:
class_paths_training = ['../datasets/self/___Early_blight', '../datasets/self/___Appids', '../datasets/self/___Leaf_miner', '../datasets/self/___Curly_top_virus']
class_names = class_paths_training

# Load dataset

In [None]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image   import ImageDataGenerator
from keras.optimizers import RMSprop
from keras.layers import Input
from keras.models import Model
from keras .applications import VGG16
from keras.optimizers import SGD
from keras.models import Model
from imutils import paths
import numpy as np
import os

import sys
sys.path.append('..')

from utils.preprocessors.aspect_aware_preprocessor import AspectAwarePreprocessor
from utils.preprocessors.image_to_array_preprocessor import ImageToArrayPreprocessor
from utils.io.simple_dataset_loader import SimpleDatasetLoader

# construct the image generator for data augmentation
aug = ImageDataGenerator(   rotation_range=30,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            shear_range=0.2,
                            zoom_range=0.2,
                            horizontal_flip=True,
                            fill_mode='nearest')

def load_datasets(path_list):
    # Load image paths
    image_paths = []
    print("[INFO] loading images...")
    for path in path_list:
        image_paths.extend(list(paths.list_images(path)))
        
    # Get unique classnames

    class_names = [pt.split(os.path.sep)[-2] for pt in image_paths]
    class_names = [str(x) for x in np.unique(class_names)]

    # Initial image preprocessing
    aap = AspectAwarePreprocessor(input_width, input_height)
    iap= ImageToArrayPreprocessor()

    #Load image data and perform image data preprocessing
    sdl = SimpleDatasetLoader(preprocessors=[aap,iap])
    (data,labels)  = sdl.load(image_paths,verbose=500)
    data = data.astype("float") / 255.0


    # train test split
    (train_x,test_x,train_y,test_y) = train_test_split(data,labels,test_size=0.25,random_state=42)

    # convert the labels from integers to vectors
    train_y = LabelBinarizer().fit_transform(train_y)
    test_y = LabelBinarizer().fit_transform(test_y)
    
    return (train_x,test_x,train_y,test_y, class_names)

(train_x,test_x,train_y,test_y, class_names) = load_datasets(class_paths_training)




# Load model

In [36]:
from keras.applications import VGG16
from keras.utils.vis_utils import plot_model
from keras.layers import Input

base_model = VGG16(weights='imagenet',include_top=False,
                  input_tensor=Input(shape = (input_width,input_height, input_depth)))

# plot_save_path = 'diagram-base-resnet50.png'
# plot_model(base_model, to_file=plot_save_path, show_shapes=True)

# Create custom head

In [37]:
from keras.layers.core import Dropout, Flatten, Dense
from keras.layers import GlobalAveragePooling2D
from keras.models import Sequential

model = Sequential()
model.add(base_model)

model.add(Dense(1024, activation='relu', name='fc1'))
model.add(Dropout(0.5))
model.add(GlobalAveragePooling2D())
# Add a softmaxc layer
model.add(Dense(len(class_paths_training),activation='softmax'))

In [27]:
from keras.layers import Dropout
from keras.models import Model

layers = [l for l in model.layers]


print(layers[0].name)

x = layers[0].output

# Add until layer layer 15
for i in range(1, 15):
    x = layers[i](x)

# Add the two dropout layers
x = layers[16](x)
x = Dropout(0.5)(x)
x = layers[17](x)
x = Dropout(0.5)(x)

# Add the rest of the layers
for i in range(18, len(layers)):
    x = layers[i](x)

new_model = Model(input=layers[0].input, output=x)

# iterate for all layers in the network and print its' index value
for (i,layer) in enumerate(new_model.layers):
    print("[INFO] {:5}\t{:30}{}".format(i, layer.name, layer.__class__.__name__))

input_3
[INFO]     0	input_3                       InputLayer
[INFO]     1	block1_conv1                  Conv2D
[INFO]     2	block1_conv2                  Conv2D
[INFO]     3	block1_pool                   MaxPooling2D
[INFO]     4	block2_conv1                  Conv2D
[INFO]     5	block2_conv2                  Conv2D
[INFO]     6	block2_pool                   MaxPooling2D
[INFO]     7	block3_conv1                  Conv2D
[INFO]     8	block3_conv2                  Conv2D
[INFO]     9	block3_conv3                  Conv2D
[INFO]    10	block3_pool                   MaxPooling2D
[INFO]    11	block4_conv1                  Conv2D
[INFO]    12	block4_conv2                  Conv2D
[INFO]    13	block4_conv3                  Conv2D
[INFO]    14	block4_pool                   MaxPooling2D
[INFO]    15	block5_conv2                  Conv2D
[INFO]    16	dropout_7                     Dropout
[INFO]    17	block5_conv3                  Conv2D
[INFO]    18	dropout_8                     Dropout
[INFO]    19

# Freeze base model

In [None]:
# traverse all layers and freeze the weight of the corresponding layer
for layer in base_model.layers:
    layer.trainable = False

# Compile model

In [None]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
# Since we only train the new fully connected layer, we do a few iterations

In [None]:
print(model.metrics_names)

# Checkpoints

In [None]:
from keras.callbacks import ModelCheckpoint


checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', mode='min', 
save_best_only=True, verbose=1)

callbacks = [checkpoint]



In [None]:
#load checkpoints if existing

import os

if(os.path.exists(checkpoint_path)):
    model.load_weights(checkpoint_path)

# Training monitor

In [None]:
from utils.callbacks.training_monitor import TrainingMonitor
import pathlib
import json
import os

fig_path = "plot"
json_path = "values.json"
values_path = 'values.json'

callbacks.append(TrainingMonitor(fig_path, json_path, start_epoch))

# Warm up head

In [None]:
model.fit_generator(
    aug.flow(train_x,train_y, batch_size = batch_size),
             validation_data = (test_x,test_y),
             epochs=num_of_epochs,
             steps_per_epoch = len(train_x) //32,
             verbose = 1,
             callbacks=callbacks)

model.save(model_save_path)

# Evaluate with 20% test set from same dataset

In [None]:
from sklearn.metrics import classification_report

print("[INFO] evaluating with test set...")
predictions = model.predict(test_x,batch_size=batch_size)
print(classification_report(test_y.argmax(axis =1),
                            predictions.argmax(axis =1),target_names=class_names))

# Evaluate with validation images

In [None]:
class_paths_validation = ['../datasets/validation/___Early_blight', '../datasets/validation/___Appids', '../datasets/validation/___Leaf_miner', '../datasets/self/___Curly_top_virus']

In [None]:
(train_x,test_x,train_y,test_y, class_names) = load_datasets(class_paths_validation)

from sklearn.metrics import classification_report

print("[INFO] evaluating with validation set...")
predictions = model.predict(test_x,batch_size=batch_size)

print(classification_report(test_y.argmax(axis =1),
                            predictions.argmax(axis =1),
                            target_names=class_names, 
                            digits=4))