 **CNN and Data Augmentation - *miscusi* Team**


Import libraries

In [None]:
#Import some useful libraries 

import os
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score #to evaluate the performance of your algorithm
from sklearn.metrics import confusion_matrix

tfk = tf.keras
tfkl = tf.keras.layers

Load Images

In [None]:
#We load and unzip the images
from google.colab import drive
drive.mount('/gdrive')

%cd /gdrive/MyDrive/challenge1DL
!unzip dataset.zip

# !unzip '/gdrive/MyDrive/challenge_1/dataset.zip'

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Image data generator + split between TRAINING SET and VALIDATION SET

cwd = os.getcwd()
dataset_dir = os.path.join(cwd, 'training')

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Augmentation on the TRAINING SET 

train_data_gen = ImageDataGenerator(rescale=1/255.,
                                      rotation_range=30,
                                      height_shift_range=50,
                                      width_shift_range=50,
                                      zoom_range=0.3,
                                      horizontal_flip=True,
                                      vertical_flip=True, 
                                      fill_mode='reflect',
                                      validation_split=0.2) #set the validation split

train_generator = train_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None, 
                                               class_mode='categorical',
                                               batch_size=8,
                                               shuffle=True,
                                               seed=seed,
                                               subset='training')

# Validation Set (to simulate the test set ...) --> NO Data Augmentation

validation_data_gen=ImageDataGenerator(rescale=1/255.,
                                       validation_split=0.2)

validation_generator = validation_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None, 
                                               class_mode='categorical',
                                               batch_size=8,
                                               shuffle=True,
                                               seed=seed,
                                               subset='validation')                              

Found 14189 images belonging to 14 classes.
Found 3539 images belonging to 14 classes.


Construction of the *Convolutional Neural Network*

In [None]:
# We construct our CNN recursively.
# In a for loop we create the single block and then concatenate all the blocks.
# The number of filters starts from 8 and doubles at each iteration of the for loop
start_f = 8 
num_classes = 14
# Number of blocks, each one CONV + ReLU + POOLING
depth = 7

model = tf.keras.Sequential()
for i in range(depth):
  if i == 0:
    input_shape = (256,256,3) 
  else:
    input_shape = [None]

# Convolutional layer
  model.add(tf.keras.layers.Conv2D(filters=start_f,
                                   kernel_size=(3,3),
                                   strides=(1,1),
                                   padding='same',
                                   input_shape=input_shape))
  
# Activation layer
  model.add(tf.keras.layers.ReLU())

# Max Pooling 2x2
  model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

  start_f *= 2

#FC layer part: 2 dense layers (512 and 128 neurons respectively) + two dropout layers 
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(units=512,
                                activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(units=128,
                                activation='relu'))
model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

model.summary()

Network Training

In [None]:
# A weighted version of categorical_crossentropy.
# This allows you to attach the problem of having unbalanced classes.

import numpy as np
from keras import backend as K
def weighted_categorical_crossentropy(weights):
    
    # weighted version of keras.objectives.categorical_crossentropy
    #INPUT:
       # weights: numpy array of shape (C,) where C is the number of classes
    #USAGE:
        #weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
        #loss = weighted_categorical_crossentropy(weights)
        #model.compile(loss=loss,optimizer='adam')
    
    weights = K.variable(weights)
        
    def loss(y_true, y_pred):
        # scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # calc
        loss = y_true * K.log(y_pred) * weights
        loss = -K.sum(loss, -1)
        return loss
    
    return loss

In [None]:
weights=np.array([0.2672	,0.5653	,0.4528	,0.2189,	0.1810	,0.1510	,0.2702	,0.3450	,0.3687	,1,	0.1633,	0.4599,	0.3922,	0.04637])

In [None]:
#We train the network

#loss = tf.keras.losses.CategoricalCrossentropy()
loss=weighted_categorical_crossentropy(weights)

lr = 1e-4

optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

metrics = ['accuracy']

model.compile(optimizer=optimizer,
              loss = loss,
              metrics=metrics)

In [None]:
# Early Stopping
callbacks=[]
# --------------
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    callbacks.append(es_callback)

In [None]:
#Fitting the model
model.fit(x=train_generator,
          epochs=100,
          validation_data=validation_generator,
          callbacks=callbacks)

In [None]:
model.save('SubmissionModel') # saves the model in the current folder 

INFO:tensorflow:Assets written to: SubmissionModel/assets
