# Building your own model with Keras

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.backend import clear_session
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, ReLU, Dropout
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.regularizers import l2
clear_session()

In [None]:
base_path = ''
train_folders_path = os.path.join(base_path,'train')
validation_folders_path = ''
test_folders_path = os.path.join(base_path,'test')

In [None]:
classlabels = os.listdir(train_folders_path)
print(classlabels)

In [None]:
# Image dimensions
img_width, img_height = 224, 224

## Building the model

In [None]:
model = Sequential()

# Step 1 - Convolutional layers
## Adding the first convolutional layer
model.add(Conv2D(32, (3, 3),padding='same', input_shape = (img_width, img_height, 3)))
model.add(ReLU())
model.add(MaxPooling2D(pool_size = (2, 2)))

# Adding a second convolutional layer
model.add(Conv2D(64, (3, 3),padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size = (2, 2)))

# Adding a third convolutional layer
model.add(Conv2D(128, (3, 3),padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size = (2, 2)))

#Adding a fourth convolutional layer
model.add(Conv2D(256, (3, 3),padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size = (2, 2)))

#Adding a fifth convolutional layer
model.add(Conv2D(512, (3, 3),padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size = (2, 2)))

#Adding a sixth convolutional layer
model.add(Conv2D(1024, (3, 3),padding='same'))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size = (2, 2)))

# and so on with the number of Conv2D filters GENERALLY increasing by powers of 2

# Step 2 - Global Average Pooling
model.add(GlobalAveragePooling2D())

# Step 3 - Full connection
model.add(Dense(units = 1024))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dropout(0.3))
model.add(Dense(units = len(classlabels), activation = 'softmax'))

# Compiling the CNN
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

## Generate the train and validation sets

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   rotation_range=30,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   validation_split=0.2)

print('Making training data generator...')
training_set = datagen.flow_from_directory(train_folders_path,
                                           target_size = (img_width, img_height),
                                           batch_size = 32,
                                           class_mode = 'categorical',
                                           subset='training')

print('Making validation data generator...')
cv_set = datagen.flow_from_directory(train_folders_path,
                                       target_size = (img_width, img_height),
                                       batch_size = 32,
                                       class_mode = 'categorical',
                                       subset='validation',
                                       shuffle = False)

## Setting up the callbacks

In [None]:
ES = EarlyStopping(monitor='val_accuracy', patience=20)
MC = ModelCheckpoint(
    filepath=(os.path.join(base_path, 'CNN Model')),
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1)
LR = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=5, min_lr = 1e-8, mode='max',verbose=1)

## Training the model

In [None]:
history = model.fit_generator(training_set,
                         steps_per_epoch = (training_set.n // 32)+1,
                         epochs = 100,
                         validation_data = cv_set,
                         validation_steps = (cv_set.n//32)+1,
                         callbacks=[ES1,MC1,LR1])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

After error analysis, if we see that there is overfitting, we can add in L2 regularizers in the hidden layers with the following format:

For example, the 2nd Conv Layer: 
<br>
model.add(Conv2D(64, (3, 3),__kernel_regularizer=l2(0.01)__, padding='same'))

Other possible solutions can include:<br>
Increase the Dropout(0.1) -> Dropout(0.5)<br>
Decrease the number of hidden layers


In the case of a vanishing gradient issue:
We can utilise __LeakyReLU__ instead of __ReLU__ activation functions