In [None]:
import numpy as np
## Progress bar
from tqdm.auto import tqdm
import pylab as plt
import copy
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, callbacks, regularizers
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.preprocessing.image import ImageDataGenerator
!pip install keras-tuner -q
import keras_tuner



[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.3/167.3 KB[0m [31m734.9 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
if 'google.colab' in str(get_ipython()):
  print("All ok -- code is running on a Google Colab")
else:
    print("==="*15, " WARNING ","==="*15)
    print("For DSA4212 assignment 1, code needs to be run on a Google Colab with a single GPU")
    print("==="*15, " WARNING ","==="*15)



All ok -- code is running on a Google Colab


In [None]:
# mount the Google Drive
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# goto to data folder -- you may need to change this location
%cd /content/drive/MyDrive/DSA4212/assignment_1/

/content/drive/MyDrive/DSA4212/assignment_1


In [None]:
class_names=["fish", "dog", "device", "chainsaw", "church", "horn", "truck", "petrol", "golf", "parachute"]

# load data -- be patient, does take a few secs
data_train_all = np.load("assignment_1_train.npz")
data_test = np.load("assignment_1_test.npz")

X_train_all = data_train_all["img"].astype(np.float32) / 255.   # set pixel intensities to [0,1]
X_test = data_test["img"].astype(np.float32) / 255.
Y_train_all = data_train_all["label"].astype(int)
Y_test = data_test["label"].astype(int)

_,H,W,C = X_train_all.shape
print(f"Img Shape: {H}x{W}x{C}")

Img Shape: 128x128x3


In [None]:
# shuffle the train set
indices_shuffled = np.arange(len(X_train_all))
np.random.shuffle(indices_shuffled)
X_train_all = X_train_all[indices_shuffled]
Y_train_all = Y_train_all[indices_shuffled]

# shuffle the validation set
indices_shuffled = np.arange(len(X_test))
np.random.shuffle(indices_shuffled)
X_test = X_test[indices_shuffled]
Y_test = Y_test[indices_shuffled]

In [None]:
len_train_all = len(X_train_all)
len_train = int(0.8 * len_train_all)
len_val = len_train_all - len_train
len_tets = len(X_test)

X_train = X_train_all[:len_train]
Y_train = Y_train_all[:len_train]

X_val = X_train_all[len_train:]
Y_val = Y_train_all[len_train:]

In [None]:
X_train.shape

(7436, 128, 128, 3)

In [None]:
# Un-augmented training images with batch size = 32
default_gen = keras.preprocessing.image.ImageDataGenerator().flow(
    X_train, Y_train,
    batch_size=32)

# Un-augmented validation images with batch size = 32
val_generator = keras.preprocessing.image.ImageDataGenerator().flow(
    X_val, Y_val,
    batch_size=32)

# Image augmentation; feeds the images into this generator which augments it
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split = 0.2)

# Augmented training images with batch size = 32
train_generator = train_datagen.flow(
    X_train, Y_train,
    batch_size=32)

## List of base models ##
 Testing out a few models inspired by architectures we will further develop
1. Basic CNN model referencing lecture code. : 53.5% validation accuracy 
2. LeNet model : 9% validation accuracy
3. AlexNet : 46% validation accuracy
4. VGG16 : 59.6% validation accuracy

Therefore, we will look into the VGG16 as well as the basic CNN model.

In [None]:
#Basic CNN model
BCCN = models.Sequential()
BCCN.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.Conv2D(48, (3, 3), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.Conv2D(64, (3, 3), activation='relu'))
BCCN.add(layers.Flatten())
BCCN.add(layers.Dense(128, activation='relu'))
BCCN.add(layers.Dense(10,activation = "softmax"))

BCCN.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

BCCN.fit(default_gen, validation_data = val_generator, epochs=10)

test_loss, test_acc = BCCN.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 0s - loss: 3.3220 - accuracy: 0.5355 - 462ms/epoch - 8ms/step

Test accuracy: 53.54838967323303 %


In [None]:
# Base LeNet model
BLN = models.Sequential()
BLN.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
BLN.add(layers.AveragePooling2D((2,2)))
BLN.add(layers.Activation('sigmoid'))
BLN.add(layers.Conv2D(64,(3,3), activation='relu'))
BLN.add(layers.AveragePooling2D((2,2)))
BLN.add(layers.Activation('sigmoid'))
BLN.add(layers.Conv2D(128, (3,3), activation='relu'))
BLN.add(layers.Flatten())
BLN.add(layers.Dense(84, activation='relu'))
BLN.add(layers.Dense(10))


BLN.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

BLN.fit(default_gen, validation_data = val_generator, epochs=10)

test_loss, test_acc = BLN.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 0s - loss: 2.3022 - accuracy: 0.0919 - 467ms/epoch - 8ms/step

Test accuracy: 9.193548560142517 %


In [None]:
# Base AlexNet
AlexNet = keras.models.Sequential([
    keras.layers.Conv2D(filters=32, kernel_size=(3,3), strides=(2,2), activation='relu', input_shape=(128,128,3)),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Conv2D(filters=64, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(4096, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4096, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(10, activation='softmax')
])

AlexNet.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

AlexNet.fit(default_gen, validation_data = val_generator, epochs=10)

test_loss, test_acc = AlexNet.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 0s - loss: 1.7864 - accuracy: 0.4645 - 407ms/epoch - 7ms/step

Test accuracy: 46.45161330699921 %


In [None]:
## VGG 16

features = [64,128]
VGG=keras.models.Sequential()

VGG.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3)))
#pooling layer
VGG.add(layers.MaxPooling2D(2,2))
VGG.add(layers.BatchNormalization()) 
for i in features:

#covolution layer
  VGG.add(layers.Conv2D(i,(3,3),activation='relu'))
#pooling layer
  VGG.add(layers.MaxPooling2D(2,2))
  VGG.add(layers.BatchNormalization())

VGG.add(layers.Flatten())
#o/p layer
VGG.add(layers.Dense(10,activation='softmax'))

#covolution layer
#VGG.add(layers.Conv2D(features[1],(3,3),activation='relu'))
#pooling layer
#VGG.add(layers.MaxPooling2D(2,2))
#VGG.add(layers.BatchNormalization())
#covolution layer
#VGG.add(layers.Conv2D(features[2],(3,3),activation='relu'))
#pooling layer
#VGG.add(layers.MaxPooling2D(2,2))
#VGG.add(layers.BatchNormalization())
#covolution layer
#VGG.add(layers.Conv2D(features[3],(3,3),activation='relu'))
#pooling layer
#VGG.add(layers.MaxPooling2D(2,2))
#VGG.add(layers.BatchNormalization())
#i/p layer
#VGG.add(layers.Flatten())
#o/p layer
#VGG.add(layers.Dense(10,activation='softmax'))

opt = keras.optimizers.Adamax(learning_rate=0.0001)

VGG.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

#VGG.summary()
VGG.fit(default_gen, validation_data = val_generator, epochs=10)

val_loss, val_acc = VGG.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', val_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 1s - loss: 1.3307 - accuracy: 0.5968 - 526ms/epoch - 9ms/step

Test accuracy: 59.67742204666138 %


## VGG16 model ##

## Initialisation stage ##

- Will be using the VGG architecture as a reference
- Constant learning rate of 0.001
- Adam as optimiser
- 3 layers for now with features, [32,48,64]
- Activation function will be sigmoid


These initial model settings are from the settings that yielded the highest validation accuracy.

In [None]:
## VGG 16

features = [48,64]
VGG=keras.models.Sequential()

VGG.add(layers.Conv2D(32,(3,3),activation='sigmoid',input_shape=(128,128,3)))
#pooling layer
VGG.add(layers.MaxPooling2D(2,2))
VGG.add(layers.BatchNormalization()) 
for i in features:

#covolution layer
  VGG.add(layers.Conv2D(i,(3,3),activation='sigmoid'))
#pooling layer
  VGG.add(layers.MaxPooling2D(2,2))
  VGG.add(layers.BatchNormalization())

VGG.add(layers.Flatten())
#o/p layer
VGG.add(layers.Dense(10,activation='softmax'))


opt = keras.optimizers.Adam(learning_rate=0.001)

VGG.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

#VGG.summary()
VGG.fit(default_gen, validation_data = val_generator, epochs=10)

val_loss, val_acc = VGG.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', val_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 1s - loss: 2.2397 - accuracy: 0.5468 - 524ms/epoch - 9ms/step

Test accuracy: 54.67742085456848 %


## Overfit ##
- Play around with the parameters to maximise training accuracy and at the same time increase validation accuracy.
- Overfitted model specifications
1. Activation function: Relu
2. Kernel size : (3,3)
3. Features : [32,48,64,128]
4. Optimiser: Adam
5. Lr_schedule : Exponential 
6. Initial LR : 0.0001

- Training accuracy by 10th epoch : 0.9833
- Validation accuracy : 0.6339
- Training time: 1m 32s


In [None]:
## VGG 16
# Changed optimiser to adam :57%
# Changed max_pooling window (3,3):54%
# Changed activation function to relu: 59%
# Change features to [32,48,64,128]: 63%
# Change to a changing learning rate: 65.91%
# Change kernel size to (4,4): 63.9%
# Change kernel size to (2,2): 65.69%
# Adding one more dense layer: 64.999%


features = [48,64,128,156]
VGG=keras.models.Sequential()

VGG.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3)))
#pooling layer
VGG.add(layers.MaxPooling2D(2,2))
VGG.add(layers.BatchNormalization()) 
for i in features:

#covolution layer
  VGG.add(layers.Conv2D(i,(3,3),activation='relu'))
#pooling layer
  VGG.add(layers.MaxPooling2D(2,2))
  VGG.add(layers.BatchNormalization())

VGG.add(layers.Flatten())
#o/p layer
VGG.add(layers.Dense(10,activation='softmax'))

initial_learning_rate = 0.001

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

opt = keras.optimizers.Adam(learning_rate=lr_schedule)

VGG.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

#VGG.summary()
VGG.fit(default_gen, validation_data = val_generator, epochs=10)

val_loss, val_acc = VGG.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', val_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 0s - loss: 1.3644 - accuracy: 0.6672 - 483ms/epoch - 8ms/step

Test accuracy: 66.72043204307556 %


## Regularization ##
- Implementing restrictions and reducing capacity to increase validation accuracy at the expense of training accuracy
- Dereasing batch size
- Add dropout layer to each iteration
- Add dropout layer in input stage
- Early stopping
- L2 and L1 regularisation

Final model specifications
1. Dropout layer at input stage (0.25)
2. L2 regularisation at final dense layer (0.01)
3. Early stopping (patience = 5, min_delta = 0.1)
4. No batch size specification

- Training accuracy by 10th epoch : 0.9627 (A lot of room to take increase validation accuracy)
- Training time : 1m28s
- Validation accuracy: 68.226%

In [None]:
## VGG 16
## Using dropout layer per iteration
# Add dropout layer(0.5) at each iteration: 60.5%
# Add dropout layer(0.1) at each iteration: 67.688%
# Add dropout layer(0.2) at each iteration: 69.95%
# Add dropout layer(0.3) at each iteration: 68.33%
# Add dropout layer(0.25) at each iteration: 71.77% , training accuracy: 0.757

## Using dropout layer for first layer(0.25):68.27% , training accuracy: 0.9662

## Decreasing batch size(Doesnt seem to do any good)
# Batch_size 256 : 15%
# Batch_size 750 : 13%  


## Early stopping 
# Patience(5), min_delta(0.001) : 64.46% 56s
# Patience(5), min_delta(0.01): 65.6989% 1m30s
# Patience(5), min_delta(0.1): 65.86% 57s

## L2 regularisation --> Will use this for grid search later
# (0.001) 67.58%

## L1 regularisation --> Will use this for grid search later
# (0.001) 67.15%


features = [48,64,128,156]
VGG=keras.models.Sequential()

VGG.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3)))
#pooling layer
VGG.add(layers.MaxPooling2D(2,2))
VGG.add(layers.BatchNormalization()) 
VGG.add(layers.Dropout(0.25))
for i in features:

#covolution layer
  VGG.add(layers.Conv2D(i,(3,3),activation='relu'))
#pooling layer
  VGG.add(layers.MaxPooling2D(2,2))
  VGG.add(layers.BatchNormalization())
#  VGG.add(layers.Dropout(0.25))

VGG.add(layers.Flatten())
#o/p layer
VGG.add(layers.Dense(10,activation='softmax',kernel_regularizer=regularizers.l2(0.001)))

early_stopping = callbacks.EarlyStopping(
    min_delta=0.1, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,
)

initial_learning_rate = 0.001

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

opt = keras.optimizers.Adam(learning_rate=lr_schedule)

VGG.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

#VGG.summary()
VGG.fit(default_gen, validation_data = val_generator, epochs=10,callbacks=[early_stopping])

val_loss, val_acc = VGG.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', val_acc*100, "%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 1s - loss: 1.3755 - accuracy: 0.6516 - 547ms/epoch - 9ms/step

Test accuracy: 65.16128778457642 %


## Tuning parameters ##
- The three hyperparameters that will require tuning will be lambda, dropout rate and early stopping.
- This stage is about finding the best parameters that returns the best validation accuracy.
- Done with the help of the keras tuner package.
- Random Search : best validation 0.66
- Bayesian Optimisation : best validation 0.61
- Final model will use hyper parameters from random search

In [None]:
def VGGT(hp):
  dpr = hp.Float("dpr", min_value=0.1, max_value=0.9, step=0.05)
  #md = hp.Float("hp", min_value=1e-4, max_value=1e-2)
  lamb2 = hp.Float("lamb2",min_value=1e-5, max_value=1e-1)
  features = [48,64,128,156]  
  model=keras.models.Sequential()

  model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3)))
  #pooling layer
  model.add(layers.MaxPooling2D(2,2))
  model.add(layers.BatchNormalization()) 
  model.add(layers.Dropout(dpr))
  for i in features:

  #covolution layer
    model.add(layers.Conv2D(i,(3,3),activation='relu'))
  #pooling layer
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.BatchNormalization())
    #model.add(layers.Dropout(0.25))

  model.add(layers.Flatten())
  #o/p layer
  model.add(layers.Dense(10,activation='softmax',kernel_regularizer=regularizers.l2(lamb2)))


  initial_learning_rate = 0.001

  lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

  opt = keras.optimizers.Adam(learning_rate=lr_schedule)

  model.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
  
  return(model)


In [None]:
## RandomSearch
VGGT(keras_tuner.HyperParameters())

RStuner = keras_tuner.RandomSearch(
    hypermodel=VGGT,
    objective="val_accuracy",
    max_trials=5,
    executions_per_trial=2,
    overwrite=True,
    directory="assignment_1",
    project_name="RandomSearch results",
)
RStuner.search(X_train, Y_train, epochs=5,validation_data=(X_val, Y_val))

Trial 5 Complete [00h 01m 28s]
val_accuracy: 0.6354838907718658

Best val_accuracy So Far: 0.6736558973789215
Total elapsed time: 00h 08m 06s


In [None]:
# Bayesian Optimisation
BOtuner = keras_tuner.BayesianOptimization(
    hypermodel=VGGT,
    objective="val_accuracy",
    max_trials=3,
    executions_per_trial=2,
    overwrite=True
)
BOtuner.search(X_train, Y_train, epochs=3, validation_data=(X_val, Y_val))


Trial 3 Complete [00h 01m 15s]
val_accuracy: 0.5596774071455002

Best val_accuracy So Far: 0.5758064389228821
Total elapsed time: 00h 03m 45s


In [None]:
# Early stopping, use data augmentation validation set to tune this
# Manually tune this one
def VGGES():
  #dpr = hp.Float("dpr", min_value=0.1, max_value=0.9, step=0.05)
  #md = hp.Float("hp", min_value=1e-4, max_value=1e-2)
  #lamb2 = hp.Float("lamb2",min_value=1e-5, max_value=1e-1)
  features = [48,64,128,156]  
  model=keras.models.Sequential()

  model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3)))
  #pooling layer
  model.add(layers.MaxPooling2D(2,2))
  model.add(layers.BatchNormalization()) 
  model.add(layers.Dropout(0.2))
  for i in features:

  #covolution layer
    model.add(layers.Conv2D(i,(3,3),activation='relu'))
  #pooling layer
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.BatchNormalization())
    #model.add(layers.Dropout(0.25))

  model.add(layers.Flatten())
  #o/p layer
  model.add(layers.Dense(10,activation='softmax',kernel_regularizer=regularizers.l2(0.001)))


  initial_learning_rate = 0.001

  lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

  opt = keras.optimizers.Adam(learning_rate=lr_schedule)

  model.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
  
  return(model)

min_delta_values = [0.1,0.01,0.001,0.0001]
for i in min_delta_values:
  early_stopping = callbacks.EarlyStopping(
      min_delta=i, # minimium amount of change to count as an improvement
      patience=5, # how many epochs to wait before stopping
      restore_best_weights=True,
    )
  modelES = VGGES()
  modelES.fit(train_datagen.flow(X_train,Y_train,batch_size = 512,subset = "training"),
           validation_data = train_datagen.flow(X_train,Y_train,batch_size = 512,subset = "validation"),callbacks=[early_stopping])
  val_loss, val_acc = modelES.evaluate(X_val,  Y_val, verbose=2)

  print('\nTest accuracy:', val_acc*100, "%")


## Final testing with tuned model ##
- 70.9% test accuracy


In [None]:
# Getting best model
best_hps = RStuner.get_best_hyperparameters(5)
# Build the model with the best hp.
model1 = VGGT(best_hps[0])

early_stopping = callbacks.EarlyStopping(
    min_delta=0.01, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,
)
model1.fit(x=X_train, y=Y_train, epochs=20,callbacks=[early_stopping])

test_loss, test_acc = model1.evaluate(X_test,  Y_test, verbose=2)
print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/20


  output, from_logits = _get_logits(






Epoch 2/20



Epoch 3/20



Epoch 4/20



Epoch 5/20



Epoch 6/20



Epoch 7/20



Epoch 8/20



Epoch 9/20



Epoch 10/20



Epoch 11/20



Epoch 12/20



Epoch 13/20



Epoch 14/20



Epoch 15/20



Epoch 16/20



Epoch 17/20



Epoch 18/20



Epoch 19/20



Epoch 20/20



121/121 - 1s - loss: 1.0995 - accuracy: 0.7090 - 1s/epoch - 10ms/step

Test accuracy: 70.90249061584473 %


## Basic CNN model from lecture ##

In [None]:
# Basic CNN model
BCCN = models.Sequential()
BCCN.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.Conv2D(48, (3, 3), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.Conv2D(64, (3, 3), activation='relu'))
BCCN.add(layers.Flatten())
BCCN.add(layers.Dense(128, activation='relu'))
BCCN.add(layers.Dense(10,activation = "softmax"))

BCCN.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

BCCN.fit(default_gen, validation_data = val_generator, epochs=10)

test_loss, test_acc = BCCN.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
59/59 - 0s - loss: 3.2629 - accuracy: 0.5360 - 447ms/epoch - 8ms/step

Test accuracy: 53.602153062820435 %


## Overfit ##
- Play around with the parameters to maximise training accuracy and at the same time increase validation accuracy. Increase capacity to increase training accuracy.
- Overfitted model specifications
1. Activation function: Relu
2. Kernel size : (4,4)
3. Features : [40,80,120,160]
4. Optimiser: Adam
5. Lr_schedule : Exponential 
6. Initial LR : 0.0001

- Training accuracy by 10th epoch : 0.9984
- Validation accuracy : 0.6737
- Training time: 51s


In [None]:
# Basic CNN model
BCCN = models.Sequential()
BCCN.add(layers.Conv2D(40, (4, 4), activation='relu', input_shape=(128, 128, 3)))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization()) 
BCCN.add(layers.Conv2D(80, (4, 4), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization()) 
BCCN.add(layers.Conv2D(120, (4, 4), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization()) 
BCCN.add(layers.Conv2D(160, (4, 4), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization()) 
BCCN.add(layers.Flatten())
BCCN.add(layers.Dense(10,activation = "softmax"))

initial_learning_rate = 0.001

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

opt = keras.optimizers.Adam(learning_rate=lr_schedule)

BCCN.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

BCCN.fit(default_gen, validation_data = val_generator, epochs=20)

test_loss, test_acc = BCCN.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
59/59 - 1s - loss: 1.8160 - accuracy: 0.6505 - 625ms/epoch - 11ms/step

Test accuracy: 65.05376100540161 %


## Regularization ##
- Implementing restrictions and reducing capacity to increase validation accuracy at the expense of training accuracy
1. Add dropout layer to each iteration
- 0.1: 61.88%
- 0.2: 59.7%
- 0.4: 66%
- 0.6: 63%
2. Early stopping
- Generally lower validation accuracy than model but not by much
3. L2 and L1 regularisation
- l1_l2: 65.91%, however it is worth to note as there are epochs with high VA
- l1: 65.698%, high VAs in validation
- l2: 71.29%, and high VAs
4. Kernel size
- (4,4) : 67.36%
- (3,3) : 63.7%
- (2,2) :  61%

Regularizations that will be used:
1. L1 regularisation
2. Early stopping
3. Dropout

Regularised model VA : 68.064%

In [None]:
# Basic CNN model
BCCN = models.Sequential()
BCCN.add(layers.Conv2D(40, (4, 4), activation='relu', input_shape=(128, 128, 3)))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization()) 
BCCN.add(layers.Dropout(0.25))
#----------------------------------------------------------
BCCN.add(layers.Conv2D(80, (4, 4), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization()) 
BCCN.add(layers.Dropout(0.25))
#----------------------------------------------------------
BCCN.add(layers.Conv2D(120, (4, 4), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization())
BCCN.add(layers.Dropout(0.25))
#----------------------------------------------------------
BCCN.add(layers.Conv2D(160, (4, 4), activation='relu'))
BCCN.add(layers.MaxPooling2D((2, 2)))
BCCN.add(layers.BatchNormalization())
BCCN.add(layers.Dropout(0.25))
#----------------------------------------------------------
BCCN.add(layers.Flatten())
BCCN.add(layers.Dense(10,activation = "softmax",kernel_regularizer=regularizers.l2(0.01)))

early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,
)

initial_learning_rate = 0.001

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

opt = keras.optimizers.Adam(learning_rate=lr_schedule)

BCCN.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

BCCN.fit(default_gen, validation_data = val_generator, epochs=20, callbacks=[early_stopping])

val_loss, val_acc = BCCN.evaluate(X_val,  Y_val, verbose=2)

print('\nTest accuracy:', val_acc*100, "%")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
59/59 - 1s - loss: 1.2709 - accuracy: 0.6651 - 604ms/epoch - 10ms/step

Test accuracy: 66.50537848472595 %


## Tuning ##
- Manually tune the hyperparameters

In [None]:
def BCCNT(hp): 
  dpr = hp.Float("dpr", min_value=0.1, max_value=0.9, step=0.05)
  lamb2 = hp.Float("lamb2",min_value=1e-5, max_value=1e-1)
  initial_learning_rate = 0.001
  BCCN = models.Sequential()
  BCCN.add(layers.Conv2D(40, (4, 4), activation='relu', input_shape=(128, 128, 3)))
  BCCN.add(layers.MaxPooling2D((2, 2)))
  BCCN.add(layers.BatchNormalization()) 
  BCCN.add(layers.Dropout(dpr))
  #----------------------------------------------------------
  BCCN.add(layers.Conv2D(80, (4, 4), activation='relu'))
  BCCN.add(layers.MaxPooling2D((2, 2)))
  BCCN.add(layers.BatchNormalization()) 
  BCCN.add(layers.Dropout(dpr))
  #----------------------------------------------------------
  BCCN.add(layers.Conv2D(120, (4, 4), activation='relu'))
  BCCN.add(layers.MaxPooling2D((2, 2)))
  BCCN.add(layers.BatchNormalization())
  BCCN.add(layers.Dropout(dpr))
  #----------------------------------------------------------
  BCCN.add(layers.Conv2D(160, (4, 4), activation='relu'))
  BCCN.add(layers.MaxPooling2D((2, 2)))
  BCCN.add(layers.BatchNormalization())
  BCCN.add(layers.Dropout(dpr))
  #----------------------------------------------------------
  BCCN.add(layers.Flatten())
  BCCN.add(layers.Dense(10,activation = "softmax",kernel_regularizer=regularizers.l2(lamb2)))

  #early_stopping = callbacks.EarlyStopping(
  #   min_delta=md, # minimium amount of change to count as an improvement
  #    patience=5, # how many epochs to wait before stopping
  #    restore_best_weights=True,
  #)

  lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
      initial_learning_rate,
      decay_steps=100000,
      decay_rate=0.96,
      staircase=True)

  opt = keras.optimizers.Adam(learning_rate=lr_schedule)

  BCCN.compile(optimizer=opt,
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return(BCCN)



In [None]:
# RandomSearch
BCCNT(keras_tuner.HyperParameters())

RStuner = keras_tuner.RandomSearch(
    hypermodel=BCCNT,
    objective="val_accuracy",
    max_trials=3,
    executions_per_trial=2,
    overwrite=True,
    directory="assignment_1",
    project_name="RandomSearch results",
)
RStuner.search(X_train, Y_train, epochs=3, validation_data=(X_val, Y_val))

Trial 5 Complete [00h 02m 24s]
val_accuracy: 0.1620967760682106

Best val_accuracy So Far: 0.5830645263195038
Total elapsed time: 00h 13m 01s


In [None]:
# Bayesian Optimisation
BOtuner = keras_tuner.BayesianOptimization(
    hypermodel=BCCNT,
    objective="val_accuracy",
    max_trials=3,
    executions_per_trial=2,
    overwrite=True
)
BOtuner.search(X_train, Y_train, epochs=3, validation_data=(X_val, Y_val))


Trial 5 Complete [00h 02m 53s]
val_accuracy: 0.4857526868581772

Best val_accuracy So Far: 0.5975806415081024
Total elapsed time: 00h 13m 02s


## Final testing ##

In [None]:
# Getting best model
best_hps = RStuner.get_best_hyperparameters(5)
# Build the model with the best hp.
model1 = BCCNT(best_hps[0])

early_stopping = callbacks.EarlyStopping(
    min_delta=0.1, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,
)
model1.fit(x=default_gen, validation_data=val_generator, epochs=13,callbacks=[early_stopping])

test_loss, test_acc = model1.evaluate(X_test,  Y_test, verbose=2)
print('\nTest accuracy:', test_acc*100, "%")

Epoch 1/13


  output, from_logits = _get_logits(


Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
121/121 - 2s - loss: 1.4777 - accuracy: 0.6354 - 2s/epoch - 12ms/step

Test accuracy: 63.53734731674194 %
