
# Arguments

In [5]:
batch_size = 32
dataset_path_plantvillage = '../datasets/plantvillage'
dataset_path_self = '../datasets/self'
dataset_path_validation = '../datasets/validation'

dataset_paths = [dataset_path_plantvillage, dataset_path_self, dataset_path_validation]

model_save_path = 'model-warmup.h5'

checkpoint_path = 'checkpoints-warmup.hdf5'

input_width = 224
input_height = 224
input_depth = 3

# --------------------------------------------------
num_of_epochs = 50
start_epoch = 0
# --------------------------------------------------

# Print class names

In [6]:
# Get classes
import os
import re

all_class_names = []
class_paths_plant_village = []
class_paths_plant_self = []
class_paths_plant_validation = []

for path in dataset_paths:
    classes = os.listdir(path)
    for i in classes:
        if(re.search('___', i)):
            if(i not in all_class_names):
                all_class_names.append(i)
            if(re.search('plantvillage', path)):
                class_paths_plant_village.append(path + '/' + i)
            elif(re.search('self', path)):
                class_paths_plant_self.append(path + '/' + i)
            else:
                class_paths_plant_validation.append(path + '/' + i)

print(all_class_names)   
print()
print('PlantVillage')
print(class_paths_plant_village) 
print(len(class_paths_plant_village))
print()
print('Self')
print(class_paths_plant_self) 
print(len(class_paths_plant_self))
print()
print('Validation')
print(class_paths_plant_validation) 
print(len(class_paths_plant_validation))

['___Yellow_Leaf_Curl_Virus', '___Late_blight', '___Early_blight', '___Two-spotted_spider_mite', '___Septoria_leaf_spot', '___Bacterial_spot', '___healthy', '___Target_Spot', '___Mosaic_Virus', '___Leaf_Mold', '___Leaf_miner', '___Curly_top_virus', '___Appids']

PlantVillage
['../datasets/plantvillage/___Yellow_Leaf_Curl_Virus', '../datasets/plantvillage/___Late_blight', '../datasets/plantvillage/___Early_blight', '../datasets/plantvillage/___Two-spotted_spider_mite', '../datasets/plantvillage/___Septoria_leaf_spot', '../datasets/plantvillage/___Bacterial_spot', '../datasets/plantvillage/___healthy', '../datasets/plantvillage/___Target_Spot', '../datasets/plantvillage/___Mosaic_Virus', '../datasets/plantvillage/___Leaf_Mold']
10

Self
['../datasets/self/___Leaf_miner', '../datasets/self/___Early_blight', '../datasets/self/___Curly_top_virus', '../datasets/self/___Appids']
4

Validation
['../datasets/validation/___Leaf_miner', '../datasets/validation/___Yellow_Leaf_Curl_Virus', '../data

# Select training classes

early blight in plantVillage dataset replaced by self one

In [7]:
class_paths_training = ['../datasets/plantvillage/___Early_blight', '../datasets/plantvillage/___Late_blight', '../datasets/plantvillage/___healthy', '../datasets/plantvillage/___Mosaic_Virus', '../datasets/plantvillage/___Yellow_Leaf_Curl_Virus', '../datasets/plantvillage/___Leaf_Mold']

# Load dataset

In [8]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image   import ImageDataGenerator
from keras.optimizers import RMSprop
from keras.layers import Input
from keras.models import Model
from keras .applications import VGG16
from keras.optimizers import SGD
from keras.models import Model
from imutils import paths
import numpy as np
import os

import sys
sys.path.append('..')

from utils.preprocessors.aspect_aware_preprocessor import AspectAwarePreprocessor
from utils.preprocessors.image_to_array_preprocessor import ImageToArrayPreprocessor
from utils.io.simple_dataset_loader import SimpleDatasetLoader

# construct the image generator for data augmentation
aug = ImageDataGenerator(   rotation_range=30,
                            width_shift_range=0.1,
                            height_shift_range=0.1,
                            shear_range=0.2,
                            zoom_range=0.2,
                            horizontal_flip=True,
                            fill_mode='nearest')

def load_datasets(path_list):
    # Load image paths
    image_paths = []
    print("[INFO] loading images...")
    for path in path_list:
        image_paths.extend(list(paths.list_images(path)))
        
    # Get unique classnames

    class_names = [pt.split(os.path.sep)[-2] for pt in image_paths]
    class_names = [str(x) for x in np.unique(class_names)]
    
    print(len(class_names))

    # Initial image preprocessing
    aap = AspectAwarePreprocessor(input_width, input_height)
    iap= ImageToArrayPreprocessor()

    #Load image data and perform image data preprocessing
    sdl = SimpleDatasetLoader(preprocessors=[aap,iap])
    (data,labels)  = sdl.load(image_paths,verbose=500)
    data = data.astype("float") / 255.0


    # train test split
    (train_x,test_x,train_y,test_y) = train_test_split(data,labels,test_size=0.25,random_state=42)

    # convert the labels from integers to vectors
    train_y = LabelBinarizer().fit_transform(train_y)
    test_y = LabelBinarizer().fit_transform(test_y)
    
    return (train_x,test_x,train_y,test_y, class_names)

(train_x,test_x,train_y,test_y, class_names) = load_datasets(class_paths_training)



[INFO] loading images...
6
[INFO]: Processed 500/11182
[INFO]: Processed 1000/11182
[INFO]: Processed 1500/11182
[INFO]: Processed 2000/11182
[INFO]: Processed 2500/11182
[INFO]: Processed 3000/11182
[INFO]: Processed 3500/11182
[INFO]: Processed 4000/11182
[INFO]: Processed 4500/11182
[INFO]: Processed 5000/11182
[INFO]: Processed 5500/11182
[INFO]: Processed 6000/11182
[INFO]: Processed 6500/11182
[INFO]: Processed 7000/11182
[INFO]: Processed 7500/11182
[INFO]: Processed 8000/11182
[INFO]: Processed 8500/11182
[INFO]: Processed 9000/11182
[INFO]: Processed 9500/11182
[INFO]: Processed 10000/11182
[INFO]: Processed 10500/11182
[INFO]: Processed 11000/11182



# Load model

In [9]:
from keras.applications import ResNet50
from keras.utils.vis_utils import plot_model
from keras.layers import Input

base_model = ResNet50(weights='imagenet',include_top=False,
                  input_tensor=Input(shape = (input_width,input_height, input_depth)))

# plot_save_path = 'diagram-base-resnet50.png'
# plot_model(base_model, to_file=plot_save_path, show_shapes=True)



# Custom FC layer

In [10]:
from keras.layers.core import Dropout
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers import GlobalAveragePooling2D

head_model = base_model.output
head_model = GlobalAveragePooling2D()(head_model)
# Add a softmaxc layer
head_model = Dense(len(class_paths_training),activation='softmax')(head_model)

# Attach custom head to model

In [11]:
from keras.models import Model


# place the head FC model on top of the base model 
model = Model(inputs=base_model.input, outputs = head_model)

# plot_save_path = 'diagram-head-attached-resnet50.png'
# plot_model(model, to_file=plot_save_path, show_shapes=True)

# Freeze base model

In [12]:
# traverse all layers and freeze the weight of the corresponding layer
for layer in base_model.layers:
    layer.trainable = False

# Compile model

In [13]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
# Since we only train the new fully connected layer, we do a few iterations

In [14]:
print(model.metrics_names)

['loss', 'accuracy']


# Checkpoints

In [15]:
from keras.callbacks import ModelCheckpoint


checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', mode='min', 
save_best_only=True, verbose=1)

callbacks = [checkpoint]



In [16]:
#load checkpoints if existing

import os

if(os.path.exists(checkpoint_path)):
    model.load_weights(checkpoint_path)

# Training monitor

In [17]:
from utils.callbacks.training_monitor import TrainingMonitor
import pathlib
import json
import os

fig_path = "plot"
json_path = "values.json"
values_path = 'values.json'

callbacks.append(TrainingMonitor(fig_path, json_path, start_epoch))

# Warm up head

In [18]:
model.fit_generator(
    aug.flow(train_x,train_y, batch_size = batch_size),
             validation_data = (test_x,test_y),
             epochs=num_of_epochs,
             steps_per_epoch = len(train_x) //32,
             verbose = 1,
             callbacks=callbacks)

model.save(model_save_path)

Epoch 1/50

Epoch 00001: val_loss improved from inf to 2.29609, saving model to checkpoints-warmup.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 2.29609 to 2.11446, saving model to checkpoints-warmup.hdf5
Epoch 3/50

Epoch 00003: val_loss improved from 2.11446 to 1.90447, saving model to checkpoints-warmup.hdf5
Epoch 4/50

Epoch 00004: val_loss did not improve from 1.90447
Epoch 5/50

Epoch 00005: val_loss did not improve from 1.90447
Epoch 6/50

Epoch 00006: val_loss did not improve from 1.90447
Epoch 7/50

Epoch 00007: val_loss did not improve from 1.90447
Epoch 8/50

Epoch 00008: val_loss did not improve from 1.90447
Epoch 9/50

Epoch 00009: val_loss did not improve from 1.90447
Epoch 10/50

Epoch 00010: val_loss did not improve from 1.90447
Epoch 11/50

Epoch 00011: val_loss did not improve from 1.90447
Epoch 12/50

Epoch 00012: val_loss did not improve from 1.90447
Epoch 13/50

Epoch 00013: val_loss did not improve from 1.90447
Epoch 14/50

Epoch 00014: val_loss did not imp

# Evaluate with 20% test set from same dataset

In [19]:
from sklearn.metrics import classification_report

print("[INFO] evaluating with test set...")
predictions = model.predict(test_x,batch_size=batch_size)
print(classification_report(test_y.argmax(axis =1),
                            predictions.argmax(axis =1),target_names=class_names))

[INFO] evaluating with test set...
                           precision    recall  f1-score   support

          ___Early_blight       0.00      0.00      0.00       249
           ___Late_blight       0.17      1.00      0.29       468
             ___Leaf_Mold       0.00      0.00      0.00       244
          ___Mosaic_Virus       0.00      0.00      0.00        95
___Yellow_Leaf_Curl_Virus       0.00      0.00      0.00      1326
               ___healthy       0.00      0.00      0.00       414

              avg / total       0.03      0.17      0.05      2796



  'precision', 'predicted', average, warn_for)


# Evaluate with validation images

In [20]:
class_paths_validation = ['../datasets/validation/___Early_blight', '../datasets/validation/___Late_blight', '../datasets/validation/___healthy', '../datasets/validation/___Mosaic_Virus', '../datasets/validation/___Yellow_Leaf_Curl_Virus', '../datasets/validation/___Leaf_Mold']

In [21]:
(train_x,test_x,train_y,test_y, class_names) = load_datasets(class_paths_validation)

from sklearn.metrics import classification_report

print("[INFO] evaluating with validation set...")
predictions = model.predict(test_x,batch_size=batch_size)

print(classification_report(test_y.argmax(axis =1),
                            predictions.argmax(axis =1),
                            target_names=class_names, 
                            digits=4))

[INFO] loading images...
6
[INFO] evaluating with validation set...
                           precision    recall  f1-score   support

          ___Early_blight     0.0000    0.0000    0.0000        12
           ___Late_blight     0.4235    1.0000    0.5950        36
             ___Leaf_Mold     0.0000    0.0000    0.0000        25
          ___Mosaic_Virus     0.0000    0.0000    0.0000         6
___Yellow_Leaf_Curl_Virus     0.0000    0.0000    0.0000         3
               ___healthy     0.0000    0.0000    0.0000         3

              avg / total     0.1794    0.4235    0.2520        85

