# AN2DL - First Challenge

## Initial Operations

### Import the libraries

In [None]:
import os
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

tfk = tf.keras
tfkl = tf.keras.layers

### Set random seed for reproducibility

In [None]:
# Random seed for reproducibility
seed = 69

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

### Metadata

In [None]:
training_dir = './GitHub/LeafClassification-AN2DL/Data/dataset_no_corrupted/training'
validation_dir = './GitHub/LeafClassification-AN2DL/Data/dataset_no_corrupted/validation'

In [None]:
input_shape = (256, 256, 3)

## Model - Transfer Learning from InceptionResNetV2

### Data Generators

In [None]:
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input

train_data_gen = ImageDataGenerator(rotation_range=90,
                                    height_shift_range=100,
                                    width_shift_range=100,
                                    zoom_range=0.5,
                                    horizontal_flip=True,
                                    vertical_flip=True,
                                    shear_range = 0.25,
                                    fill_mode='reflect',
                                    brightness_range=[0.5,1.5],
                                    preprocessing_function = preprocess_input)
val_data_gen = ImageDataGenerator(preprocessing_function = preprocess_input)

train_gen = train_data_gen.flow_from_directory(directory=training_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None,
                                               class_mode='categorical',
                                               batch_size=128,
                                               shuffle=True,
                                               seed=seed)
validation_gen = val_data_gen.flow_from_directory(directory=validation_dir,
                                           target_size=(256,256),
                                           color_mode='rgb',
                                           classes=None,
                                           class_mode='categorical',
                                           batch_size=128,
                                           shuffle=True,
                                           seed=seed)

Found 16167 images belonging to 14 classes.
Found 1540 images belonging to 14 classes.


### Class Weight (for Unbalanced Classes)

In [None]:
classes = dict()
for label in sorted(os.listdir(training_dir)):
  classes[label] = len(os.listdir(training_dir + "/" + label))
total = 0
class_weight = dict()
for i, samples_number in enumerate(classes.values()):
    class_weight[i] = 1/samples_number
    total += samples_number
class_weight = {key:value*total/14 for key, value in class_weight.items()}

### Model Definition

#### Supernet (InceptionResNetV2)

In [None]:
# Download the supernet
supernet = tfk.applications.inception_resnet_v2.InceptionResNetV2(
    include_top=False,
    weights="imagenet",
    input_shape=(256,256,3)
)
# Fine Tuning - Freeze the first layers
supernet.trainable = True
for i, layer in enumerate(supernet.layers[:460]):
    layer.trainable=False

2021-11-26 08:22:08.443463: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-26 08:22:08.533731: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-26 08:22:08.534471: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-26 08:22:08.535698: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


#### Additional Dense Layers

In [None]:
# Build the model
input_layer = tfk.Input(shape=input_shape)
features_extractor = supernet(input_layer)
flattening = tfkl.Flatten(name='Flattening')(features_extractor)
dropuout_flattening = tfkl.Dropout(0.2, seed=seed)(flattening)
dense1 = tfkl.Dense(
    512, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(dropuout_flattening)
dropout_dense1 = tfkl.Dropout(0.3, seed=seed)(dense1)
dense2 = tfkl.Dense(
    480, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(dropout_dense1)
dropout_dense2 = tfkl.Dropout(0.2, seed=seed)(dense2)
dense3 = tfkl.Dense(
    192, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(dropout_dense2)
dropout_dense3 = tfkl.Dropout(0.15, seed=seed)(dense3)
output_layer = tfkl.Dense(
    14, 
    activation='softmax',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(dropout_dense3)
model = tfk.Model(inputs=input_layer, outputs=output_layer, name='InceptionResNetV2_SameLR')

### Model Training

#### Callbacks

In [None]:
def callbacks(dir,checkpoint = False):
    callbacks = []
    if checkpoint:
        # Checkpoints
        checkpoints_dir = os.path.join(dir,"Checkpoint")
        if not os.path.exists(checkpoints_dir):
            os.makedirs(checkpoints_dir)
        checkpoint = tfk.callbacks.ModelCheckpoint(filepath=os.path.join(checkpoints_dir,"Model.hdf5"),
                                                   monitor = "accuracy",
                                                   save_weights_only=False,
                                                   save_best_only=True
                                                  )
        callbacks.append(checkpoint)
    # Tensorboard
    tensorboard_dir = os.path.join(dir,"TensorBoard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    tensorboard = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_dir,
                                                 profile_batch=0,
                                                 histogram_freq=1)
    callbacks.append(tensorboard)
    
    return callbacks

#### First Run (Bigger Learning Rate)

In [None]:
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate = 1e-4), metrics='accuracy')
history = model.fit(
    x = train_gen,
    epochs = 15,
    validation_data = validation_gen,
    callbacks = callbacks('First'),
    class_weight=class_weight
).history

2021-11-26 08:22:21.376268: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/15


2021-11-26 08:22:36.972212: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005




2021-11-26 08:29:04.140406: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 3397386240 exceeds 10% of free system memory.


Epoch 2/15


2021-11-26 08:35:03.148293: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 3397386240 exceeds 10% of free system memory.


Epoch 3/15


2021-11-26 08:41:26.077251: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 3397386240 exceeds 10% of free system memory.


Epoch 4/15


2021-11-26 08:47:26.622153: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 3397386240 exceeds 10% of free system memory.


Epoch 5/15


2021-11-26 08:53:47.416105: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 3397386240 exceeds 10% of free system memory.


Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


#### Second Run (Smaller Learning Rate and Bigger Epsilon)

In [None]:
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate = 1e-5,epsilon = 1e-4), metrics='accuracy')
history = model.fit(
    x = train_gen,
    epochs = 20,
    validation_data = validation_gen,
    callbacks = callbacks('Second',checkpoint = True),
    class_weight=class_weight
).history

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
