###Import libraries

In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from PIL import Image

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

###Seed for reproducibility

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

###Load dataset

In [None]:
# Dataset folders
validation_dir = '../input/train-val/output/output/val'
training_dir = '../input/train-val/output/output/train'      # MESSO DOPPIO MA ANDRA' SISTEMATO

In [None]:
# Plot example images from dataset
labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']

num_row = len(labels)//2
num_col = len(labels)//num_row
fig, axes = plt.subplots(num_row, num_col, figsize=(2*num_row,15*num_col))
for i in range(len(labels)):
  if i < len(labels):
    class_imgs = next(os.walk('{}/{}/'.format(training_dir, labels[i])))[2]
    class_img = class_imgs[0]
    img = Image.open('{}/{}/{}'.format(training_dir, labels[i], class_img))
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))
    ax.set_title('{}'.format(labels[i]))
plt.tight_layout()
plt.show()

###Data Loader

In [None]:
# While creating the data generator, split in train and test sets

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an instance of ImageDataGenerator for training
datagen = ImageDataGenerator(#rescale=1./255,
    height_shift_range=50,
    width_shift_range=50,                                   
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True, 
    fill_mode='reflect',
    )

train_generator = datagen.flow_from_directory(
    training_dir,
    target_size=(256, 256),
    color_mode='rgb',
    batch_size=256,
    class_mode='categorical',
    shuffle=True,
    ) # set as training data

validation_generator = datagen.flow_from_directory(
    validation_dir, 
    target_size=(256, 256),
    color_mode='rgb',
    batch_size=256,
    class_mode='categorical',
    shuffle=False,
    ) 

#model.fit_generator(
#    train_generator,
#    steps_per_epoch = train_generator.samples // batch_size,
#    validation_data = validation_generator, 
#    validation_steps = validation_generator.samples // batch_size,
#    epochs = nb_epochs)


In [None]:
# check labels (keep lexicographical order)
print("Assigned labels")
print(train_generator.class_indices)
print()
print("Target classes")
print(train_generator.classes)

In [None]:
def get_next_batch(generator):
  batch = next(generator)

  image = batch[0]
  target = batch[1]

  print("(Input) image shape:", image.shape)
  print("Target shape:",target.shape)

  # Visualize only the first sample
  image = image[0]
  target = target[0]
  target_idx = np.argmax(target)
  print()
  print("Categorical label:", target)
  print("Label:", target_idx)
  print("Class name:", labels[target_idx])
  fig = plt.figure(figsize=(6, 4))
  plt.imshow(np.uint8(image))

  return batch

In [None]:
# Get a sample from dataset and show info
_ = get_next_batch(train_generator)

###Models metadata

In [None]:
input_shape = (256, 256, 3)       # image resolution is 256*256 rgb
epochs = 200                      # MESSO 200 PERCHE' NELLE ESE ABBIAMO FATTO COSI', CONTROLLA

###VGG19 model

In [None]:
# Download and plot the VGG16 model
supernet = tfk.applications.VGG19(
    include_top=False,    # we remove the classifier and keep only the feature extraction part
    weights="imagenet",   
    input_shape=(256,256,3)
)
supernet.summary()
tfk.utils.plot_model(supernet)

In [None]:
#import regularizers
from tensorflow.keras import regularizers
# Use the supernet as feature extractor
supernet.trainable = False      # the optimizer won't consider the parameters of vgg gduring the optimization

inputs = tfk.Input(shape=(256,256,3))
x = tfkl.Resizing(256, 256, interpolation="bicubic")(inputs)        # why resize? to what dimension?
x = supernet(x)
# beginning of the part we are going to train
x = tfkl.GlobalAveragePooling2D(name='GlobalPooling')(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
x = tfkl.Dense(
    64, 
    kernel_regularizer=regularizers.l2(0.01),
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
x = tfkl.Dense(
    32,
    kernel_regularizer=regularizers.l2(0.01),
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
outputs = tfkl.Dense(
    14, 
    activation='softmax',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
# end of the part we are going to train - VGG remains fixed

# Connect input and output through the Model class
tl_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')
tl_model.summary()
tfk.utils.plot_model(tl_model)

In [None]:
d_class_weights = {
 0: 1.2709710218607015,
 1: 2.9515938606847696,
 2: 2.0644095788604457,
 3: 1.0692899914456802,
 4: 0.8383635144198525,
 5: 0.7114399544678429,
 6: 1.352813852813853,
 7: 1.676727028839705,
 8: 1.8601190476190477,
 9: 4.201680672268908,
 10: 0.7647598654022637,
 11: 2.0644095788604457,
 12: 1.9201228878648233,
 13: 0.22561140691273351}
d_class_weights

In [None]:
# Train the model
tl_history = tl_model.fit(
    x = train_generator,
    class_weight = d_class_weights,
    batch_size = 256,
    epochs = 200,
    validation_data = validation_generator,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=10, restore_best_weights=True)]
).history

In [None]:
# Plot the training
plt.figure(figsize=(15,5))
plt.plot(tl_history['loss'], label='Training', alpha=.3, color='#ff7f0e', linestyle='--')
plt.plot(tl_history['val_loss'], label='Validation', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(tl_history['accuracy'], label='Training', alpha=.8, color='#ff7f0e', linestyle='--')
plt.plot(tl_history['val_accuracy'], label='Validation', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
# Save the best model
tl_model.save('TransferLearningModel')
del tl_model  # To avoid filling the memory

In [None]:
# Re-load the model after transfer learning
ft_model = tfk.models.load_model('TransferLearningModel')
ft_model.summary()

In [None]:
# Set all VGG layers to True
ft_model.get_layer('vgg19').trainable = True
for i, layer in enumerate(ft_model.get_layer('vgg19').layers):
   print(i, layer.name, layer.trainable)

In [None]:
# Freeze first N layers, e.g., until 14th
for i, layer in enumerate(ft_model.get_layer('vgg19').layers[:18]):
  layer.trainable=False
for i, layer in enumerate(ft_model.get_layer('vgg19').layers):
   print(i, layer.name, layer.trainable)
ft_model.summary()

# this way we set true also the pooling but it's not a problem since it doesn't have parameters

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics='accuracy')

In [None]:
# Fine-tune the model
ft_history = ft_model.fit(
    x = train_generator,
    class_weight = d_class_weights,
    batch_size = 256,
    epochs = 200,
    validation_data = validation_generator,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=10, restore_best_weights=True)]
).history

In [None]:
# Plot the training
plt.figure(figsize=(15,5))
plt.plot(tl_history['loss'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(tl_history['val_loss'], label='Transfer Learning', alpha=.8, color='#4D61E2')
plt.plot(ft_history['loss'], alpha=.3, color='#2ABC3D', linestyle='--')
plt.plot(ft_history['val_loss'], label='Fine Tuning', alpha=.8, color='#2ABC3D')
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(tl_history['accuracy'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(tl_history['val_accuracy'], label='Transfer Learning', alpha=.8, color='#4D61E2')
plt.plot(ft_history['accuracy'], alpha=.3, color='#2ABC3D', linestyle='--')
plt.plot(ft_history['val_accuracy'], label='Fine Tuning', alpha=.8, color='#2ABC3D')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
ft_model.save('FineTuningModel')
del ft_model