In [20]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, BatchNormalization, Flatten, Conv2D, Dropout, MaxPool2D
from tensorflow.keras.models import load_model
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.mnist.load_data()
import matplotlib.pyplot as plt

print(tf.__version__)
import numpy as np

2.1.0


# Data Preprocessing

In [21]:
# Reshaping the array to 4-dims so that it can work with the Keras API
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
# Making sure that the values are float so that we can get decimal points after division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Normalizing the RGB codes by dividing it to the max RGB value.
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('Number of images in x_train', x_train.shape[0])
print('Number of images in x_test', x_test.shape[0])

x_train shape: (60000, 28, 28, 1)
Number of images in x_train 60000
Number of images in x_test 10000


# Finding Appropriate number of Conv Layers

### A) Input -> Conv2d(24,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [26]:
model = tf.keras.models.Sequential()

model.add(Conv2D(24,kernel_size=5,activation='relu',padding='same',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [27]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb89694bd50>

### B) Input -> Conv2d(24,5) -> Pool(2) -> Conv2d(48,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [28]:
model = tf.keras.models.Sequential()

model.add(Conv2D(24,kernel_size=5,activation='relu',padding='same',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(48,kernel_size=5,activation='relu',padding='same'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [29]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb895b8b310>

### C) Input -> Conv2d(24,5) -> Pool(2) -> Conv2d(48,5) -> Pool(2) -> Conv2d(64,5) -> Pool(2) ->Flatten -> Dense(128) -> Dense(10)

In [30]:
model = tf.keras.models.Sequential()

model.add(Conv2D(24,kernel_size=5,activation='relu',padding='same',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(48,kernel_size=5,activation='relu',padding='same'))
model.add(MaxPool2D())

model.add(Conv2D(64,kernel_size=5,activation='relu',padding='same'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [31]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb89721b410>

### Conclusion: Although it looks C can leverage better accuracy upon more training the ideal choice seems to be B as it is equally good and also computationally less expensive than C

# Finding appropriate feature map size

### B1) Input -> Conv2d(16,5) -> Pool(2) -> Conv2d(32,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [40]:
model = tf.keras.models.Sequential()

model.add(Conv2D(16,kernel_size=5,activation='relu',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(32,kernel_size=5,activation='relu'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [41]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb888309350>

### B2) Input -> Conv2d(24,5) -> Pool(2) -> Conv2d(48,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [42]:
model = tf.keras.models.Sequential()

model.add(Conv2D(24,kernel_size=5,activation='relu',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(48,kernel_size=5,activation='relu'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [43]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb8881383d0>

### B3) Input -> Conv2d(32,5) -> Pool(2) -> Conv2d(64,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [44]:
model = tf.keras.models.Sequential()

model.add(Conv2D(32,kernel_size=5,activation='relu',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(64,kernel_size=5,activation='relu'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [45]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb7f82c1490>

### B4) Input -> Conv2d(48,5) -> Pool(2) -> Conv2d(96,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [46]:
model = tf.keras.models.Sequential()

model.add(Conv2D(48,kernel_size=5,activation='relu',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(96,kernel_size=5,activation='relu'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [47]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb7f8170450>

### B5) Input -> Conv2d(64,5) -> Pool(2) -> Conv2d(128,5) -> Pool(2) -> Flatten -> Dense(128) -> Dense(10)

In [50]:
model = tf.keras.models.Sequential()

model.add(Conv2D(64,kernel_size=5,activation='relu',input_shape=(28,28,1)))
model.add(MaxPool2D())

model.add(Conv2D(96,kernel_size=5,activation='relu'))
model.add(MaxPool2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [51]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb7e8545d10>

### Conclusion: Best Training accuracy and Validation accuracy is achieved in B3

# Adding Dropout

In [56]:
for i in range(1,4):
    dropout = i*0.2
    model = tf.keras.models.Sequential()

    model.add(Conv2D(32,kernel_size=5,activation='relu',input_shape=(28,28,1)))
    model.add(MaxPool2D())
    model.add(Dropout(dropout))
    
    model.add(Conv2D(64,kernel_size=5,activation='relu'))
    model.add(MaxPool2D())
    model.add(Dropout(dropout))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(10, activation='softmax'))
    
    model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    model.fit(x=x_train,y=y_train,epochs = 20,verbose=0)
    model.evaluate(x_test,y_test)



### Conclusion: 40% dropout seems good.

### All these experiments are random and hence I had to run them several times to be sure of the conclusions

# Further improvements

#### 1) Instead of using one 5\*5 kernel in Convolution, we can use two 3\*3 kernels to mimic the behaviour along with increase in non-linearity  
#### 2) Instead of MaxPooling,we can try a Convolutional layer with a stride of 2 and see if there is any improvement
#### 3) We can add BatchNormalisation to reduce overfitting so that model generalises better.
#### 4) Data augmentaition can also be used but model seems to perform sufficiently good without it.

In [63]:
model = tf.keras.models.Sequential()
dropout = 0.40
model.add(Conv2D(32,kernel_size=3,activation='relu',input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Conv2D(32,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(32,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))

model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.fit(x=x_train,y=y_train,epochs = 20,validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb79efa38d0>

In [64]:
model.save('cnn_model.h5')

### Finally after doing various runs in each experiment, I achieved a training accuracy of around 99.6 and a validation accuracy of around 99.4%. Data Augmentation and using the bigger network might increase this a bit further but this seems to be a good balance of accuracy and computational expense.

In [65]:
model.evaluate(x_test,y_test)



[0.019054458866282948, 0.9945]