# ANNDL Homework 1 - Best Model
**Team:** MercyMain

**Team Members:**
- Azimi Arya
- Belotti Ottavia
- Izzo Riccardo


The best model consists in an ensemble of two models both based on _Xception_. 
The first one is a straight forward transfer learning, while the second exploit the CutOut method for data augmentation and a periodic scheduling of the learning rate. 
The scheduler keeps the learning rate constant for 5 epochs, then it performs an exponential decay for the following 10 epochs, finally it goes back to the initial learning rate and restart the cycle.

In [None]:
import numpy as np
import pandas as pd
import keras.layers
import tensorflow as tf
import os
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime

# Transfer learning with Xception
from tensorflow.keras.applications import Xception

tfk = tf.keras
tfkl = tf.keras.layers
tfk.__version__

# Random seed for reproducibility

seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Method for checkpoints save and callbacks 

def create_folders_and_callbacks(model_name):

    exps_dir = './Challenge_CheckPoints'
    if not os.path.exists(exps_dir):
        os.makedirs(exps_dir)

    now = datetime.now().strftime('%b%d_%H-%M-%S')

    exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    callbacks = []

    # Model checkpoint

    ckpt_dir = os.path.join(exp_dir, 'ckpts')
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp'),
                                                       save_weights_only=False,      
                                                       save_best_only=True)         
    callbacks.append(ckpt_callback)

    # Visualize Learning on Tensorboard

    tb_dir = os.path.join(exp_dir, 'tb_logs')
    if not os.path.exists(tb_dir):
        os.makedirs(tb_dir)

    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                                 profile_batch=0,
                                                 histogram_freq=1) 
    callbacks.append(tb_callback)

    # Early Stopping

    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    callbacks.append(es_callback)

    return callbacks

## Data Preprocessing and Data Augmentation

In [None]:
# Cutout preprocessing function
def eraser(input_img):
    p=0.5
    s_l=0.02
    s_h=0.4
    r_1=0.3
    r_2=1/0.3
    v_l=0
    v_h=255
    pixel_level=False
    
    img_h, img_w, img_c = input_img.shape
    p_1 = np.random.rand()

    if p_1 > p:
        return input_img

    while True:
        s = np.random.uniform(s_l, s_h) * img_h * img_w
        r = np.random.uniform(r_1, r_2)
        w = int(np.sqrt(s / r))
        h = int(np.sqrt(s * r))
        left = np.random.randint(0, img_w)
        top = np.random.randint(0, img_h)

        if left + w <= img_w and top + h <= img_h:
            break

    if pixel_level:
        c = np.random.uniform(v_l, v_h, (h, w, img_c))
    else:
        c = np.random.uniform(v_l, v_h)

    input_img[top:top + h, left:left + w, :] = c

    return input_img



In [None]:
# Add noise preprocessing function
# Custom function for gaussian noise 
def add_noise(img):
    # Add random noise to an image
    VARIABILITY = 25
    deviation = VARIABILITY*random.random()
    noise = np.random.normal(0, deviation, img.shape)
    img += noise
    np.clip(img, 0., 255.)
    return img

In [None]:
# Dataset folders
training_dir = "/kaggle/input/dataset-homework-1/training_data_final/"

split_seed = 123  # Splitting seed needed to avoid overlap in training and validation set
batch_size = 32

# Data augmentation for training set for First Model

train_data_gen_V1 = ImageDataGenerator(
    validation_split=0.1,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=(0.3, 1.7),
    rotation_range=180,
    height_shift_range=0.3,
    width_shift_range=0.3,
    fill_mode='reflect',
    channel_shift_range=0.5,
    preprocessing_function=add_noise,
    )

valid_data_gen_V1 = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.1,
)

# Data load from directory and split in validation and training

training_set_V1 = train_data_gen_V1.flow_from_directory(
    directory=training_dir,
    target_size=(299, 299),
    color_mode='rgb',
    classes=None,
    batch_size=batch_size,
    shuffle=True,
    subset='training',
    seed=seed)

validation_set_V1 = valid_data_gen_V1.flow_from_directory(
    directory=training_dir,
    target_size=(299, 299),
    color_mode='rgb',
    classes=None,
    batch_size=batch_size,
    shuffle=True,
    subset='validation',
    seed=seed)

from collections import Counter
counter_V1 = Counter(training_set_V1.classes)
weights_V1 = {}

# This weighting tells the model to "pay more attention" to samples from an under-represented class when passed to the mpdel's fit function (e.g. Species1 and Species6)
for i in counter_V1.items():
    weights_V1[i[0]] = 1/i[1]*training_set_V1.samples/8

In [None]:
# Data augmentation for training set for Second Model

train_data_gen_V2 = ImageDataGenerator(
    validation_split=0.1,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=(0.3, 1.7),
    rotation_range=180,
    height_shift_range=0.3,
    width_shift_range=0.3,
    fill_mode='nearest',
    channel_shift_range=0.5,
    preprocessing_function=eraser,
    )

valid_data_gen_V2 = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.1,
)

# Data load from directory and split in validation and training

training_set_V2 = train_data_gen_V2.flow_from_directory(
    directory=training_dir,
    target_size=(299, 299),
    color_mode='rgb',
    classes=None,
    batch_size=batch_size,
    shuffle=True,
    subset='training',
    seed=seed)

validation_set_V2 = valid_data_gen_V2.flow_from_directory(
    directory=training_dir,
    target_size=(299, 299),
    color_mode='rgb',
    classes=None,
    batch_size=batch_size,
    shuffle=True,
    subset='validation',
    seed=seed)

from collections import Counter
counter_V2 = Counter(training_set_V2.classes)
weights_V2 = {}

for i in counter_V2.items():
    weights_V2[i[0]] = 1/i[1]*training_set_V2.samples/8

## Learning Rate Scheduler

In [None]:
def scheduler_fit(epoch, lr):
    base_rate = 1e-3
    if epoch % 15 < 5:
        return base_rate
    else:
        return lr * tf.math.exp(-0.01)


def scheduler_tuning(epoch, lr):
    base_rate = 1e-5
    if epoch % 15 < 5:
        return base_rate
    else:
        return lr * tf.math.exp(-0.01)

## First Model: Transfer Learning with Xception

In [None]:
epochs = 250
input_shape = (299, 299, 3)

# Import Xception pretrained application without the FC part

base_model = Xception(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=input_shape,
    pooling=None
)

# Freeze every layer in the net
base_model.trainable = False

def build_model(input_shape):
    # Use the Xception feature extraction part
    x = base_model.output
    
    global_average = tfkl.GlobalAveragePooling2D()(x)
    dense1 = tfkl.Dense(units=256, activation='gelu', kernel_initializer='he_uniform', input_dim=input_shape)(global_average)
    dp1 = tfkl.Dropout(0.3)(dense1)
    dense2 = tfkl.Dense(units=128, activation='gelu', kernel_initializer='he_uniform')(dp1)
    dp2 = tfkl.Dropout(0.3)(dense2)
    output_layer = tfkl.Dense(units=8, activation='softmax', kernel_initializer='he_uniform')(dp2)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=base_model.input, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), 
                  optimizer=tfk.optimizers.Adam(), 
                  metrics='accuracy')

    # Return the model
    return model


model = build_model(input_shape)
model.summary() 
    
    
# Create folders and callbacks and fit
locals_callbacks = create_folders_and_callbacks(model_name='transfer_Xception_0')


# Train the model
history = model.fit(
	training_set_V1,
	epochs=epochs,
	validation_data=validation_set_V1,
	callbacks=locals_callbacks,
    class_weight=weights_V1
).history

#----------------------
# Fine tuning part

model.trainable = True

# Recompile the model with lower learning rate
model.compile(loss=tfk.losses.CategoricalCrossentropy(), 
              optimizer=tfk.optimizers.Adam(1e-4), 
              metrics='accuracy')

# Callbacks
locals_callbacks1 = create_folders_and_callbacks(model_name='transfer_Xception_1')

# Retrain the model
history = model.fit(
	training_set_V1,
	epochs=epochs,
	validation_data=validation_set_V1,
	callbacks=locals_callbacks1,
    class_weight=weights_V1
).history

# Save model
model.save("/kaggle/output/working/Transfer_Xception_Save")

## Second Model: Transfer Learning with Xception, CutOut and Learning Rate scheduler

In [None]:
epochs = 200
input_shape = (299, 299, 3)

# Import Xception pretrained application without the FC part

base_model = Xception(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=input_shape,
    pooling=None
)

# Freeze every layer in the net
base_model.trainable = False

def build_model(input_shape):
    # Use the Xception feature extraction part
    x = base_model.output
    
    #Build a custom FC part
    global_average = tfkl.GlobalAveragePooling2D()(x)
    
    dense1 = tfkl.Dense(units=256, activation='gelu', kernel_initializer='he_uniform')(global_average)
    bn5 = tfkl.BatchNormalization()(dense1)
    dp3 = tfkl.Dropout(0.3)(bn5)
    dense2 = tfkl.Dense(units=128, activation='gelu', kernel_initializer='he_uniform')(dp3)

    bn6 = tfkl.BatchNormalization()(dense1)

    dp4 = tfkl.Dropout(0.3)(bn6)

    output_layer = tfkl.Dense(units=8, activation='softmax', kernel_initializer='he_uniform',name='Output')(dp4)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=base_model.input, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), 
                  optimizer=tfk.optimizers.Adam(), 
                  metrics='accuracy')

    # Return the model
    return model


model = build_model(input_shape)
model.summary() 
    
    
# Create folders and callbacks and fit
locals_callbacks = create_folders_and_callbacks(model_name='transfer_Xception_0_V2')
lr_callback = tf.keras.callbacks.LearningRateScheduler(scheduler_fit)


# Train the model
history = model.fit(
	training_set_V2,
	epochs=epochs,
	validation_data=validation_set_V2,
	callbacks=[locals_callbacks, lr_callback],
    class_weight=weights_V2
).history

#---------------------------
# Fine tuning part

# Unfreeze all the layers in the Xception
model.trainable = True

# Recompile the model
model.compile(loss=tfk.losses.CategoricalCrossentropy(), 
              optimizer=tfk.optimizers.Adam(), 
              metrics='accuracy')

# Callbacks
locals_callbacks1 = create_folders_and_callbacks(model_name='transfer_Xception_1_V2')
# Scheduler with lower intial learning rate for tuning
lr_callback = tf.keras.callbacks.LearningRateScheduler(scheduler_tuning)

# Retrain the model
history1 = model.fit(
	training_set_V2,
	epochs=epochs,
	validation_data=validation_set_V2,
	callbacks=[locals_callbacks1, lr_callback],
    class_weight=weights_V2
).history

# Save model
model.save("/kaggle/output/working/Transfer_Xception_V2_Save")

## Ensemble
Once the models have been trained, the ensemble is done at prediction time as follow, for each input:
1. Get the class predictions/probabilities from both models
2. Compute the average of the scores between the 2 classification output vectors, class-wise
3. Consider the new averaged vector as the classification output, hence extract the predicted class for the image by choosing the most likely one (i.e. _argmax_)

In the code snippet below, we insert the procedure done in the `Model.py` file placed in the submission folder

In [None]:
# Model.py
class model:
    def __init__(self, path):
        self.model_0 = tf.keras.models.load_model(os.path.join(path, 'SubmissionModel', 'Xception_0'))
        self.model_1 = tf.keras.models.load_model(os.path.join(path, 'SubmissionModel', 'Xception_lr_sched_cutout'))

    def predict(self, X):

        # Insert your preprocessing here
        X = X /255.

        X = tf.image.resize(
            X,
            (299, 299),
            method='nearest',
            preserve_aspect_ratio=False,
            antialias=False
        )

        out_0 = self.model_0.predict(X)
        out_1 = self.model_1.predict(X)

        # Initialize averaged prediction matrix
        out_avg = np.empty(shape=out_0.shape)

        # Compute avg prediction scores between the 2 available models
        for i in range(len(out_0)):
            for j in range(len(out_0[i])):
                out_avg[i, j] = (out_0[i, j] + out_1[i, j]) / 2

        # Get best class prediction for each image
        out = tf.argmax(out_avg, axis=-1)

        return out