#Importing the data and some preprocessing

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import tensorflow
from tensorflow import keras

In [2]:
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Normalizing the data and one-hot encoding the labels
from tensorflow.keras.utils import to_categorical
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [4]:
# Splitting the data into training and validation sets as we want to save the test set for the evaluation of the best model
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

In [5]:
from keras import models
from keras import layers
from tensorflow.keras import optimizers

In [6]:
saved_models = []
accuracies = []

#Base Model

In [7]:
model = models.Sequential()
model.add(layers.Conv2D(64, (5, 5), strides=(2, 2),activation='relu', kernel_initializer='truncated_normal', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dense(10, activation='softmax'))

#Trying different learning rates

In [8]:
model_1 = keras.models.clone_model(model)
opt_1 = optimizers.SGD(learning_rate=0.05, momentum=0.9)
model_1.compile(optimizer=opt_1, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_1.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
model_2 = keras.models.clone_model(model)
opt_2 = optimizers.SGD(learning_rate=0.001, momentum=0.9)
model_2.compile(optimizer=opt_2, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_2.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
model_3 = keras.models.clone_model(model)
opt_3 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_3.compile(optimizer=opt_3, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_3.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#Trying different architectures

In [11]:
model_4 = models.Sequential()
model_4.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_4.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_4.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_4.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_4.add(layers.Flatten())
model_4.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_4.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_4.add(layers.Dense(10, activation='softmax'))

In [12]:
opt_4 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_4.compile(optimizer=opt_4, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_4.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_4)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
model_5 = models.Sequential()
model_5.add(layers.Conv2D(16, (3, 3), strides = (2, 2),activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_5.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_5.add(layers.Conv2D(32, (3, 3), strides = (2, 2),activation='relu', kernel_initializer='he_uniform'))
model_5.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_5.add(layers.Flatten())
model_5.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
model_5.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_5.add(layers.Dense(10, activation='softmax'))

In [14]:
opt_5 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_5.compile(optimizer=opt_5, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_5.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_5)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
model_6 = models.Sequential()
model_6.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model_6.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_6.add(layers.Conv2D(32, (5, 5), activation='relu', kernel_initializer='he_uniform'))
model_6.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model_6.add(layers.Flatten())
model_6.add(layers.Dense(32, activation='relu', kernel_initializer='he_uniform'))
model_6.add(layers.Dense(10, activation='softmax'))

In [None]:
opt_6 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_6.compile(optimizer=opt_6, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_6.fit(X_train, y_train, epochs=10, batch_size=32, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_6)

Epoch 1/10

#Trying different batch sizes

In [None]:
model_7 = keras.models.clone_model(model_4)
opt_7 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_7.compile(optimizer=opt_7, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_7.fit(X_train, y_train, epochs=10, batch_size=64, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_7)

In [None]:
model_8 = keras.models.clone_model(model_4)
opt_8 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_8.compile(optimizer=opt_8, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_8.fit(X_train, y_train, epochs=10, batch_size=16, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_8)

In [None]:
model_9 = keras.models.clone_model(model_4)
opt_9 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_9.compile(optimizer=opt_9, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_9.fit(X_train, y_train, epochs=10, batch_size=64, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_9)

In [None]:
model_10 = keras.models.clone_model(model_4)
opt_10 = optimizers.SGD(learning_rate=0.01, momentum=0.9)
model_10.compile(optimizer=opt_10, loss='categorical_crossentropy', metrics=['accuracy'])
history = model_10.fit(X_train, y_train, epochs=10, batch_size=128, shuffle=True,validation_data=(X_val, y_val), verbose=1)
saved_models.append(model_10)

#Models Evaluation

In [None]:
from operator import itemgetter
for model in saved_models:
  acc = model.evaluate(X_test, y_test, verbose = 0)
  accuracies.append(acc)
  print(acc[1] * 100)

sorted(accuracies, key=itemgetter(1))
print(accuracies[0][1])