In [1]:
# TensorFlow and tf.keras
import tensorflow as tf
import numpy as np
from tensorflow import keras

#tf version should be 2.5 or higher
tf.__version__

'2.6.0'

In [2]:
#get data
(train_images, train_labels), (test_images, test_labels) = \
      keras.datasets.mnist.load_data()

In [3]:
#scale model
train_images = train_images / 255.0
test_images = test_images / 255.0

In [4]:
#reshape inputs for CNN layers
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)

In [5]:
#setup model
model = keras.Sequential([
    keras.layers.Conv2D(kernel_size = 3, filters = 12, use_bias = False, padding = 'same', input_shape = (28, 28, 1)),
    keras.layers.BatchNormalization(center = True, scale = False),
    keras.layers.Activation('relu'),
    keras.layers.Conv2D(kernel_size = 6, filters = 24, use_bias = False, padding = 'same', strides = 2),
    keras.layers.BatchNormalization(center = True, scale = False),
    keras.layers.Activation('relu'),
    keras.layers.Conv2D(kernel_size = 6, filters = 32, use_bias = False, padding = 'same', strides = 2),
    keras.layers.BatchNormalization(center = True, scale = False),
    keras.layers.Activation('relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(200, use_bias = False),
    keras.layers.BatchNormalization(center = True, scale = False),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(60, use_bias = False),
    keras.layers.BatchNormalization(center = True, scale = False),
    keras.layers.Activation('relu'),
    keras.layers.Dropout(0.4),
    keras.layers.Dense(10, activation = 'softmax')
])

In [6]:
#compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
          loss='sparse_categorical_crossentropy',
          metrics=['accuracy'])

In [7]:
#train model
model.fit(train_images, train_labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24986084910>

In [8]:
#evaluate
test_loss, test_acc = model.evaluate(test_images,  test_labels)
print('\nTest accuracy:', test_acc)


Test accuracy: 0.9934999942779541


In [9]:
# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

In [10]:
#Raw predictions
predictions

array([[5.4225300e-08, 2.0636460e-06, 7.3080173e-06, 6.6429739e-06,
        1.7030276e-07, 1.2548448e-08, 1.9275942e-07, 9.9998045e-01,
        1.2944292e-07, 3.0281287e-06],
       [5.4155200e-09, 3.4371986e-08, 9.9999964e-01, 2.1957625e-07,
        1.9203218e-10, 2.3774524e-11, 6.2083359e-08, 4.1023696e-09,
        3.1579791e-08, 2.2955962e-09],
       [3.3926556e-11, 9.9999988e-01, 2.2266466e-08, 8.2159213e-10,
        1.2287871e-08, 4.7262234e-09, 8.6154408e-09, 1.2329279e-07,
        1.2001941e-08, 8.5216390e-11],
       [9.9999952e-01, 4.6948654e-09, 1.3584091e-07, 6.3453803e-10,
        3.9695390e-08, 5.0637112e-09, 9.8634189e-08, 9.6121315e-09,
        1.6288914e-07, 9.4575739e-08],
       [1.6418786e-09, 1.2814853e-08, 2.8850006e-10, 4.9717066e-12,
        9.9999797e-01, 4.9264419e-09, 3.0573709e-08, 1.3531484e-09,
        1.2569196e-08, 2.0638631e-06]], dtype=float32)

In [11]:
# Print our model's predictions
print(np.argmax(predictions, axis=1))

[7 2 1 0 4]


In [12]:
# Check our predictions against the ground truths
print(test_labels[:5]) # [7, 2, 1, 0, 4]

[7 2 1 0 4]


## Comment:

At first, I changed the optimizer of the model from 'SGD' to 'Adam' to improve the test accuracy, then I followed the points that was mentioned in lecture slides to improve the accuracy on the test dataset by adding dropout layers between every dense layers and adding batch normaliztion in the keras layers. I have also added one additional dense layer with 100 units but the test accuracy dropped less than 99%. Furthermore, I tried adding dropout layer of 20% between every dense layers but the test accuracy did not cross 99.25%.