 **CNN and Data Augmentation - *miscusi* Team**


Import libraries

In [None]:
#Import some useful libraries 

import os
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score #to evaluate the performance of your algorithm
from sklearn.metrics import confusion_matrix

tfk = tf.keras
tfkl = tf.keras.layers

Load Images

In [None]:
#We load and unzip the images
from google.colab import drive
drive.mount('/gdrive')

%cd /gdrive/MyDrive/TL
!unzip dataset.zip

# !unzip '/gdrive/MyDrive/challenge_1/dataset.zip'

Random seed

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Image data generator + split between TRAINING SET and VALIDATION SET

cwd = os.getcwd()
dataset_dir = os.path.join(cwd, 'training')

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Augmentation on the TRAINING SET 

train_data_gen = ImageDataGenerator(rescale=1/255.,
                                      rotation_range=30,
                                      height_shift_range=50,
                                      width_shift_range=50,
                                      zoom_range=0.3,
                                      horizontal_flip=True,
                                      vertical_flip=True, 
                                      fill_mode='reflect',
                                      validation_split=0.2) #set the validation split

train_generator = train_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None, 
                                               class_mode='categorical',
                                               batch_size=8,
                                               shuffle=True,
                                               seed=seed,
                                               subset='training')

# Validation Set (to simulate the test set ...) --> NO Data Augmentation

validation_data_gen=ImageDataGenerator(rescale=1/255.,
                                       validation_split=0.2)

validation_generator = validation_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None, 
                                               class_mode='categorical',
                                               batch_size=8,
                                               shuffle=True,
                                               seed=seed,
                                               subset='validation')                              

Found 14189 images belonging to 14 classes.
Found 3539 images belonging to 14 classes.


Construction of the *Convolutional Neural Network*

In [None]:
# We construct our CNN recursively.
# In a for loop we create the single block and then concatenate all the blocks.
# The number of filters starts from 8 and doubles at each iteration of the for loop
start_f = 8 
num_classes = 14
# Number of blocks, each one CONV + ReLU + POOLING
depth = 7

model = tf.keras.Sequential()
for i in range(depth):
  if i == 0:
    input_shape = (256,256,3) 
  else:
    input_shape = [None]

# Convolutional layer
  model.add(tf.keras.layers.Conv2D(filters=start_f,
                                   kernel_size=(3,3),
                                   strides=(1,1),
                                   padding='same',
                                   input_shape=input_shape))
  
# Activation layer
  model.add(tf.keras.layers.ReLU())

# Max Pooling 2x2
  model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

  start_f *= 2

#FC layer part: 2 dense layers (512 and 128 neurons respectively) + two dropout layers 
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(units=512,
                                activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(units=128,
                                activation='relu'))
model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 256, 256, 8)       224       
                                                                 
 re_lu (ReLU)                (None, 256, 256, 8)       0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 8)      0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 128, 128, 16)      1168      
                                                                 
 re_lu_1 (ReLU)              (None, 128, 128, 16)      0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 64, 64, 16)       0         
 2D)                                                  

Network Training

In [None]:
#We train the network

loss = tf.keras.losses.CategoricalCrossentropy()

lr = 1e-4

optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

metrics = ['accuracy']

model.compile(optimizer=optimizer,
              loss = loss,
              metrics=metrics)

In [None]:
# Early Stopping
callbacks=[]
# --------------
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    callbacks.append(es_callback)

In [None]:
#Fitting the model
model.fit(x=train_generator,
          epochs=100,
          validation_data=validation_generator,
          callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100


<keras.callbacks.History at 0x7fc63981ea10>

In [None]:
model.save('SubmissionModel') # saves the model in the current folder 

INFO:tensorflow:Assets written to: SubmissionModel/assets
