In [None]:
! pip install gdown

In [None]:
!gdown --id 11iZ3AZ1OrUU4TimBlFVneV0e7-_HrWgu

In [None]:
!unzip dataset.zip

In [None]:
dataset_dir = 'training'

In [None]:
import tensorflow as tf
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers
from tensorflow.keras import layers

plt.style.use('seaborn')
tfk = tf.keras
tfkl = tf.keras.layers
from tensorflow.keras.layers import Dropout, Dense, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras import applications
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.utils import class_weight

print(tf.__version__)


In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Plot example images from dataset
labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']

num_row = len(labels)//2
num_col = len(labels)//num_row
fig, axes = plt.subplots(num_row, num_col, figsize=(2*num_row,15*num_col))
for i in range(len(labels)):
  if i < len(labels):
    class_imgs = next(os.walk('{}/{}/'.format(dataset_dir, labels[i])))[2]
    class_img = class_imgs[0]
    img = Image.open('{}/{}/{}'.format(dataset_dir, labels[i], class_img))
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))
    ax.set_title('{}'.format(labels[i]))
plt.tight_layout()
plt.show()

In [None]:
#Training and preprocessing metadata

preprocessing_funct = applications.xception.preprocess_input
valid_split = 0.2
input_shape = (256, 256, 3)
batch_size = 256
epochs = 60

In [None]:
# Create an instance of ImageDataGenerator with Data Augmentation
train_data_gen = ImageDataGenerator(rotation_range=30,
                                        height_shift_range=50,
                                        width_shift_range=50,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True, 
                                        fill_mode='reflect',
                                        shear_range=0.2,
                                        preprocessing_function=preprocessing_funct,
                                        validation_split=valid_split) # rescale value is multiplied to the imag
valid_data_gen = ImageDataGenerator(
                                    preprocessing_function=preprocessing_funct,
                                    validation_split=valid_split)

# Obtain a data generator with the 'ImageDataGenerator.flow_from_directory' method
train_gen = train_data_gen.flow_from_directory(directory=dataset_dir,
                                                       target_size=(256,256),
                                                       color_mode='rgb',
                                                       classes=None, # can be set to labels
                                                       class_mode='categorical',
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       seed=seed,
                                                       subset='training')
valid_gen = valid_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None, # can be set to labels
                                               class_mode='categorical',
                                               batch_size=batch_size,
                                               shuffle=False,
                                               seed=seed,
                                               subset='validation'
                                               )


In [None]:
# Plot of the data distribution of training dataset
_, counts = np.unique(train_gen.classes, return_counts=True)

plt.figure(figsize=(15, 4))
sns.barplot(x=labels, y=counts)
plt.title('Images occurrency per class (Training set) ');
plt.show();



In [None]:
#Since we have an unbalanced dataset, we compute the class weights

class_weights = class_weight.compute_class_weight(class_weight='balanced',classes = np.unique(train_gen.classes),y = train_gen.classes)

class_weights = dict(enumerate(class_weights))





In [None]:
# Utility function to create folders and callbacks for training
from datetime import datetime


def create_folders_and_callbacks(model_name):

  exps_dir = os.path.join('transfer_learning_experiments')
  if not os.path.exists(exps_dir):
      os.makedirs(exps_dir)

  now = datetime.now().strftime('%b%d_%H-%M-%S')

  exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
  if not os.path.exists(exp_dir):
    
      os.makedirs(exp_dir)
      
  callbacks = []

  # Model checkpoint
  # ----------------
  ckpt_dir = os.path.join(exp_dir, 'ckpts')
  if not os.path.exists(ckpt_dir):
      os.makedirs(ckpt_dir)

  ckpt_callback = ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                     save_weights_only=False, # True to save only weights
                                                     save_best_only=False) # True to save only the best epoch 
  callbacks.append(ckpt_callback)

  # Visualize Learning on Tensorboard
  # ---------------------------------
  tb_dir = os.path.join(exp_dir, 'tb_logs')
  if not os.path.exists(tb_dir):
      os.makedirs(tb_dir)
      
  # By default shows losses and metrics for both training and validation
  tb_callback = TensorBoard(log_dir=tb_dir, 
                                               profile_batch=0,
                                               histogram_freq=1)  # if > 0 (epochs) shows weights histograms
  callbacks.append(tb_callback)

  # Early Stopping
  # --------------
  es_callback = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
  callbacks.append(es_callback)

  return callbacks

# ***Build model***
We try to apply transfer learning from the well-known Xception network. In the first trial we only trained the fully connected part. Then, we also trained the last layers of xception.

#Xception Network

In [None]:
xception_01= applications.Xception(
                include_top=False,
                weights='imagenet',
                input_shape=input_shape)

xception_01.summary()

In [None]:
base_model = xception_01



initializer = tf.keras.initializers.HeNormal()

def set_trainable_layers(num_layers, model):

    for layer in model.layers[:-num_layers]:
        layer.trainable = False

    for layer in model.layers[-num_layers:]:
        layer.trainable = True
    

    

set_trainable_layers(30, base_model)



model = tf.keras.models.Sequential([
                base_model,
                GlobalAveragePooling2D(),
                Dense(512, activation='relu',kernel_initializer=initializer, kernel_regularizer=regularizers.l1_l2(l1=5e-4, l2=5e-4)),
                Dropout(rate = 0.5),
                Dense(512, activation='relu',kernel_initializer=initializer, kernel_regularizer=regularizers.l1_l2(l1=5e-4, l2=5e-4)),
                Dropout(rate = 0.5),
                Dense(256, activation='relu',kernel_initializer=initializer, kernel_regularizer=regularizers.l1_l2(l1=5e-4, l2=5e-4)),
                Dropout(rate = 0.3),
                Dense(14, activation='softmax',kernel_initializer='glorot_uniform')])


#defining optimizer
adamopt = Adam(learning_rate=3e-4)

#model compilation
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=adamopt, metrics='accuracy')
model.summary()



In [None]:
# Create folders and callbacks and fit
callbacks = create_folders_and_callbacks(model_name='xception_model_finetuning')
# Train the model
history = model.fit(x = train_gen,epochs = epochs, validation_data = valid_gen, class_weight=class_weights, callbacks = callbacks)

In [None]:
# Save best epoch model
model.save("transfer_learning_experiments/xception_model_finetuning")

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [None]:
# Predict the test set with the CNN
predictions = model.predict(valid_gen)
predictions = np.argmax(predictions, axis=-1)
predictions.shape

# Compute the confusion matrix
cm = confusion_matrix(valid_gen.classes,predictions, normalize='true')

# Compute the classification metrics
accuracy = accuracy_score(valid_gen.classes, predictions)
precision = precision_score(valid_gen.classes, predictions, average='macro')
recall = recall_score(valid_gen.classes, predictions, average='macro')
f1 = f1_score(valid_gen.classes, predictions, average='macro')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                                display_labels=labels)

fig, ax = plt.subplots(figsize=(24,24))
plt.tick_params(axis=u'both', which=u'both',length=0)
plt.grid(b=None)
disp.plot(ax=ax)