This is the second part of the notebook we have used to build the model submitted for the final evaluation. Kaggle allows only runs which last less than 9 hourse, so we needed to split the training into 2 parts, otherwise we would have exceeded the maximum time.

In [None]:
# Importing libraries
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Class Weights are needed to contrast the problem of class imbalance.

labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']

num_of_images_training = []
for l in labels:
    path = '../input/full-dataset/Dataset/training' + '/' + l
    num_img_class_l = len(os.listdir(path))
    num_of_images_training.append(num_img_class_l)

tot_num_images = 0
for i in range(len(labels)):
    tot_num_images += num_of_images_training[i]
    
weights = []
for i in range(len(labels)):
    num = (1 / num_of_images_training[i])*(tot_num_images)/14.0 
    weights.append(num)
    
print(weights)

# Vector of weights we will use for the training

class_weights = {0: weights[0], 1: weights[1], 2: weights[2], 3: weights[3], 4: weights[4], 5: weights[5], 6: weights[6], 7: weights[7], 8: weights[8], 9: weights[9], 10: weights[10], 11: weights[11], 12: weights[12], 13: weights[13]}

for i in range(len(labels)):
    print(str(i) + ': {:.3f}'.format(weights[i]))

In [None]:
# Data augmentation

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input

train_data_gen_data_aug = ImageDataGenerator(rotation_range=30,
                                        height_shift_range=50,
                                        width_shift_range=50,
                                        zoom_range=0.3, # To better recognize both veins (zoom in) and edges (zoom out) of the leaves
                                        horizontal_flip=True,
                                        vertical_flip=True, # To recognize leaves when they're upside down
                                        fill_mode='constant') # To avoid multiple leaves in the same image
                                        

print("IDG OK")

In [None]:
training_dir = '../input/full-dataset/Dataset/training'

train_gen_data_aug = train_data_gen_data_aug.flow_from_directory(directory=training_dir,
                                                           target_size=(256,256),
                                                           color_mode='rgb',
                                                           classes=None, 
                                                           class_mode='categorical',
                                                           batch_size=64,
                                                           shuffle=True,
                                                           seed=seed)

In [None]:
# We load the model whose training has been left halfway
model = tfk.models.load_model('../input/output/SubmissionModel')

In [None]:
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-5), metrics='accuracy')

In [None]:
# Utility function to create folders and callbacks for training
from datetime import datetime

def create_folders_and_callbacks(model_name):

    exps_dir = os.path.join('./working1')
    if not os.path.exists(exps_dir):
        os.makedirs(exps_dir)

    now = datetime.now().strftime('%b%d_%H-%M-%S')

    exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)
      
    callbacks = []

    # Model checkpoint (Ci permette di salvare il model di cui abbiamo appena eseguito il training)
    ckpt_dir = os.path.join(exp_dir, 'ckpts')
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                     save_weights_only=False, # True to save only weights
                                                     save_best_only= True) # True to save only the best epoch 
    callbacks.append(ckpt_callback)


    return callbacks

In [None]:
callbacks = create_folders_and_callbacks(model_name='model')

# Second half of the second step of the training
history = model.fit(
    x = train_gen_data_aug,
    batch_size = 64,
    epochs = 50,
    callbacks = callbacks,
    class_weight = class_weights # Class weights computed by us
).history

In [None]:
model.save('./working2')

In [None]:
# Plot the training
plt.figure(figsize=(15,5))
plt.plot(history['loss'], label='Training', alpha=.8, color='#ff7f0e')
# plt.plot(history['val_loss'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Binary Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], label='Training', alpha=.8, color='#ff7f0e')
# plt.plot(history['val_accuracy'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()