# T1.1: MNIST

## mnist_cnn.py


ssh -o ProxyCommand="ssh -g -L 8889:localhost:8889 s4451856@sshgw.leidenuniv.nl -q -W U0065090:22" -g -L 8889:localhost:8889 s4451856@U0065090

In [35]:
'''Trains a simple convnet on the MNIST dataset.

Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K


In [None]:
batch_size = 128
num_classes = 10
epochs = 50

# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

## mnist_mlp.py


In [38]:
'''Trains a simple deep NN on the MNIST dataset.

Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''

from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

In [None]:
batch_size = 128
num_classes = 10
epochs = 20

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

# T1.2: Fashion MNIST

## (a) Multi-Layer Perceptron

In [41]:
import os
from keras import regularizers
os.environ["KERAS_BACKEND"] = "tensorflow"

fashion_mnist = keras.datasets.fashion_mnist
(xTrainFull, yTrainFull), (xTest, yTest) = fashion_mnist.load_data()

xVal, xTrain = xTrainFull[:5000] / 255.0, xTrainFull[5000:] / 255.0
yVal, yTrain = yTrainFull[:5000], yTrainFull[5000:]

classNames = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
 "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]


def MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                 hiddenLayers=1,
                 outputActivation="softmax",
                 hiddenActivation="relu",
                 optimiser="sgd",
                 epochs=20,
                 alpha=5):

  MLPModel = Sequential()
  MLPModel.add(keras.layers.Flatten(input_shape=[28, 28]))

  for i in range(hiddenLayers):
    MLPModel.add(keras.layers.Dense(300, activation=hiddenActivation,
                                    kernel_regularizer=regularizers.L1L2(l1=alpha, l2=alpha),
                                    bias_regularizer=regularizers.L2(alpha),
                                    activity_regularizer=regularizers.L2(alpha)))
    
    MLPModel.add(keras.layers.Dense(100, activation=hiddenActivation,
                                    kernel_regularizer=regularizers.L1L2(l1=alpha, l2=alpha),
                                    bias_regularizer=regularizers.L2(alpha),
                                    activity_regularizer=regularizers.L2(alpha)))


  MLPModel.add(keras.layers.Dense(10, activation=outputActivation))

  MLPModel.compile(loss="sparse_categorical_crossentropy",
  optimizer=optimiser,
  metrics=["accuracy"])

  history = MLPModel.fit(xTrain, yTrain, epochs=epochs,
                      validation_data=(xVal, yVal))

  pd.DataFrame(history.history).plot(figsize=(8, 5))
  plt.grid(True)
  plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
  plt.show()

  test_loss, test_acc = MLPModel.evaluate(xTest,  yTest, verbose=2)
  print(f"Hidden layers: {hiddenLayers}")
  print(f"Optimiser: {optimiser}")
  print(f"Output Activation: {outputActivation}")
  print(f"Hidden Activation: {hiddenActivation}")
  print(f"Alpha: {alpha}")
  print(f"Epochs: {epochs}")
  print(f"Test loss: {test_loss}")
  print(f"Test accuracy: {test_acc}")

  return test_acc

## (b) Convolutional Neural Network

In [48]:
from re import X

def CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                 hiddenLayers=1,
                 outputActivation="softmax",
                 hiddenActivation="relu",
                 convActivation="relu",
                 optimiser="sgd",
                 epochs=20,
                 alpha=5):

  CNNModel = Sequential()
  CNNModel.add(Conv2D(28, (3, 3), activation=convActivation, input_shape=(28, 28, 1)))
  CNNModel.add(MaxPooling2D((2, 2)))
  CNNModel.add(Conv2D(64, (3, 3), activation=convActivation))
  CNNModel.add(MaxPooling2D((2, 2)))
  CNNModel.add(Conv2D(64, (3, 3), activation=convActivation))

  CNNModel.add(Flatten())

  for i in range(hiddenLayers):
    CNNModel.add(Dense(64, activation=hiddenActivation))

  CNNModel.add(Dense(10, activation=outputActivation))

  CNNModel.compile(optimizer=optimiser,
                loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])


  history = CNNModel.fit(xTrain, yTrain, epochs=epochs,
                      validation_data=(xVal, yVal))

  pd.DataFrame(history.history).plot(figsize=(8, 5))
  plt.grid(True)
  plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
  plt.show()

  test_loss, test_acc = CNNModel.evaluate(xTest,  yTest, verbose=2)

  print(f"Hidden layers: {hiddenLayers}")
  print(f"Optimiser: {optimiser}")
  print(f"Output Activation: {outputActivation}")
  print(f"Hidden Activation: {hiddenActivation}")
  print(f"Conv Activation: {convActivation}")
  print(f"Alpha: {alpha}")
  print(f"Epochs: {epochs}")
  print(f"Test loss: {test_loss}")
  print(f"Test accuracy: {test_acc}")

  return test_acc

## Experimentation and Evaluation

### MLP Model Evaluation

Naive approach, best hyperparameters:

Hidden layers: 1

Optimiser: Adam

Output Activation: SoftPlus

Hidden Activation: Tanh

Alpha:  5

Epochs: 30

Test accuracy: 0.8705999851226807

In [None]:
import numpy as np

# MLP model eval
# Epochs
bestEpochs = (30, 0)
#for epochs in [20, 25, 30, 35, 40, 45, 50]:
  #accuracy = MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest, epochs=epochs)

  #if accuracy > bestEpochs[1]:
    #bestEpochs = (epochs, accuracy)

  #else:
    #break


# Optimiser
bestOptimiser = ("Adam", 0)
#for optimiser in ["SGD",
#                  "RMSprop",
#                  "Adam",
#                  "AdamW",
#                  "Adadelta",
#                  "Adagrad",
#                  "Adamax",
#                  "Adafactor",
#                  "Nadam",
#                  "Ftrl",
#                  "Lion"]:
#  accuracy = MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
#                          epochs=bestEpochs[0],
#                          optimiser=optimiser)

#  if accuracy > bestOptimiser[1]:
#    bestOptimiser = (optimiser, accuracy)


# Output activation function
bestOutActivation = ("softplus", 0)
#for activation in ["relu", "sigmoid", "softmax", "softplus", "softsign",
#                   "tanh", "selu", "elu", "exponential"]:
#  accuracy = MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
#                          outputActivation=activation,
#                          epochs=bestEpochs[0],
#                          optimiser=bestOptimiser[0])

#  if accuracy > bestOutActivation[1]:
#    bestOutActivation = (activation, accuracy)


# Hidden Layer activation function
bestHiddenActivation = ("tanh", 0)
#for activation in ["relu", "sigmoid", "softmax", "softplus", "softsign",
#                   "tanh", "selu", "elu", "exponential"]:
#  accuracy = MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
#                          outputActivation=bestOutActivation[0],
#                          hiddenActivation=activation,
#                          epochs=bestEpochs[0],
#                          optimiser=bestOptimiser[0])

#  if accuracy > bestHiddenActivation[1]:
#    bestHiddenActivation = (activation, accuracy)


# Hidden Layers
bestHidden = (1, 0)
#for layers in range(1, 10):
#  accuracy = MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
#                          hiddenLayers=layers,
#                          outputActivation=bestOutActivation[0],
#                          hiddenActivation=bestHiddenActivation[0],
#                          epochs=bestEpochs[0],
#                          optimiser=bestOptimiser[0])

#  if accuracy > bestHidden[1]:
#    bestHidden = (layers, accuracy)

#  else:
#    break


# Regularisations
bestAlpha = (0, 0)
alphas = np.logspace(-10, -2, 200)
for alpha in alphas:
  accuracy = MLPModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          hiddenLayers=bestHidden[0],
                          outputActivation=bestOutActivation[0],
                          hiddenActivation=bestHiddenActivation[0],
                          epochs=bestEpochs[0],
                          optimiser=bestOptimiser[0],
                          alpha=alpha)

  if accuracy > bestAlpha[1]:
    bestAlpha = (alpha, accuracy)

print(f"Best Hidden layers: {bestHidden[0]}")
print(f"Best Optimiser: {bestOptimiser[0]}")
print(f"Best Output Activation: {bestOutActivation[0]}")
print(f"Best Hidden Activation: {bestHiddenActivation[0]}")
print(f"Best Epochs: {bestEpochs[0]}")
print(f"Final Test accuracy: {bestHidden[1]}")


### CNN Model Evaluation

Naive approach, best hyperparameters:

Hidden layers: 1

Optimiser: SGD

Output Activation: softmax

Hidden Activation: tanh

Conv Activation: relu

Alpha: 7.564633275546291e-07

Epochs: 20

Test loss: 0.3733844459056854

Test accuracy: 0.8733000159263611

In [None]:
# CNN model eval
# Epochs
bestEpochs = (0, 0)
for epochs in range(20, 50):
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest, epochs=epochs)

  if accuracy > bestEpochs[1]:
    bestEpochs = (epochs, accuracy)

  else:
    break

# Optimiser
bestOptimiser = ("", 0)
for optimiser in ["SGD",
                  "RMSprop",
                  "Adam",
                  "AdamW",
                  "Adadelta",
                  "Adagrad",
                  "Adamax",
                  "Adafactor",
                  "Nadam",
                  "Ftrl",
                  "Lion"]:
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          epochs=bestEpochs[0],
                          optimiser=optimiser)

  if accuracy > bestOptimiser[1]:
    bestOptimiser = (optimiser, accuracy)


# Output activation function
bestOutActivation = ("", 0)
for activation in ["relu", "sigmoid", "softmax", "softplus", "softsign",
                   "tanh", "selu", "elu", "exponential"]:
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          outputActivation=activation,
                          epochs=bestEpochs[0],
                          optimiser=bestOptimiser[0])

  if accuracy > bestOutActivation[1]:
    bestOutActivation = (activation, accuracy)


# Hidden Layer activation function
bestHiddenActivation = ("", 0)
for activation in ["relu", "sigmoid", "softmax", "softplus", "softsign",
                   "tanh", "selu", "elu", "exponential"]:
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          outputActivation=bestOutActivation[0],
                          hiddenActivation=activation,
                          epochs=bestEpochs[0],
                          optimiser=bestOptimiser[0])

  if accuracy > bestHiddenActivation[1]:
    bestHiddenActivation = (activation, accuracy)


# Hidden Layer activation function
bestConvActivation = ("", 0)
for activation in ["relu", "sigmoid", "softmax", "softplus", "softsign",
                   "tanh", "selu", "elu", "exponential"]:
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          outputActivation=bestOutActivation[0],
                          hiddenActivation=bestHiddenActivation[0],
                          convActivation=activation,
                          epochs=bestEpochs[0],
                          optimiser=bestOptimiser[0])

  if accuracy > bestConvActivation[1]:
    bestConvActivation = (activation, accuracy)


# Hidden Layers
bestHidden = (0, 0)
for layers in range(1, 10):
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          hiddenLayers=layers,
                          outputActivation=bestOutActivation[0],
                          hiddenActivation=bestHiddenActivation[0],
                          convActivation=bestConvActivation[0],
                          epochs=bestEpochs[0],
                          optimiser=bestOptimiser[0])

  if accuracy > bestHidden[1]:
    bestHidden = (layers, accuracy)

  else:
    break


# Regularisations
bestAlpha = (0, 0)
alphas = np.logspace(-10, -2, 100)
for alpha in alphas:
  accuracy = CNNModelTest(xTrain, yTrain, xVal, yVal, xTest, yTest,
                          hiddenLayers=bestHidden[0],
                          outputActivation=bestOutActivation[0],
                          hiddenActivation=bestHiddenActivation[0],
                          convActivation=bestConvActivation[0],
                          epochs=bestEpochs[0],
                          optimiser=bestOptimiser[0],
                          alpha=alpha)

  if accuracy > bestAlpha[1]:
    bestAlpha = (alpha, accuracy)

print(f"Best Hidden layers: {bestHidden[0]}")
print(f"Best Optimiser: {bestOptimiser[0]}")
print(f"Best Output Activation: {bestOutActivation[0]}")
print(f"Best Hidden Activation: {bestHiddenActivation[0]}")
print(f"Best Convolutional Activation: {bestConvActivation[0]}")
print(f"Best Epochs: {bestEpochs[0]}")
print(f"Final Test accuracy: {bestHidden[1]}")