## CNN on MNIST dataset

##### Objective - To try different CNN architecture ([3x3],[5x5],[7x7]) on MNIST data

In [1]:
# Importing libraries
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.initializers import he_normal
from keras.layers.normalization import BatchNormalization
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

batch_size = 68
num_classes = 10
epochs = 4

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [3]:
# this function is used draw Categorical Crossentropy Loss VS No. of epochs plot
def plt_dynamic(x, vy, ty):
  plt.figure(figsize=(10,5))
  plt.plot(x, vy, 'b', label="Validation Loss")
  plt.plot(x, ty, 'r', label="Train Loss")
  plt.xlabel('Epochs') 
  plt.ylabel('Categorical Crossentropy Loss')
  plt.title('\nCategorical Crossentropy Loss VS Epochs')
  plt.legend()
  plt.grid()
  plt.show()

Model 1: CNN with 3x3 Kernel

In [None]:
# Initialising the model
model_3 = Sequential()

# Adding first conv layer
model_3.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=input_shape))

# Adding second conv layer
model_3.add(Conv2D(64, (3, 3), activation='relu'))

# Adding Maxpooling layer
model_3.add(MaxPooling2D(pool_size=(2, 2)))

# Adding Dropout
model_3.add(Dropout(0.25))

# Adding third conv layer
model_3.add(Conv2D(128, (3, 3), activation='relu'))

# Adding Maxpooling layer
model_3.add(MaxPooling2D(pool_size=(2, 2)))

# Adding Dropout
model_3.add(Dropout(0.25))

# Adding flatten layer
model_3.add(Flatten())

# Adding first hidden layer
model_3.add(Dense(256, activation='relu',kernel_initializer=he_normal(seed=None)))

# Adding Dropout
model_3.add(Dropout(0.5))

# Adding output layer
model_3.add(Dense(num_classes, activation='softmax'))

# Printing model Summary
print(model_3.summary())

# Compiling the model
model_3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fitting the data to the model
history_3 = model_3.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(x_test, y_test))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 128)         0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 5, 5, 128)         0         
__________

In [None]:
# Initialising the model
model_4 = Sequential()

# Adding first conv layer
model_4.add(Conv2D(8, kernel_size=(5, 5),padding='same',activation='relu',input_shape=input_shape))

# Adding second conv layer
model_4.add(Conv2D(16, (5, 5), activation='relu'))

# Adding Maxpooling layer
model_4.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

# Adding Dropout
model_4.add(Dropout(0.25))

# Adding third conv layer
model_4.add(Conv2D(32, (5, 5),padding='same', activation='relu'))

# Adding Maxpooling layer
model_4.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

# Adding Dropout
model_4.add(Dropout(0.25))

# Adding fourth conv layer
model_4.add(Conv2D(64, (5, 5),padding='same',activation='relu'))

# Adding Maxpooling layer
model_4.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

# Adding Dropout
model_4.add(Dropout(0.25))

# Adding flatten layer
model_4.add(Flatten())

# Adding first hidden layer
model_4.add(Dense(256, activation='relu',kernel_initializer=he_normal(seed=None)))

# Adding Batch Normalization
model_4.add(BatchNormalization())

# Adding Dropout
model_4.add(Dropout(0.5))

# Adding output layer
model_4.add(Dense(num_classes, activation='softmax'))

# Printing model Summary
print(model_4.summary())

# Compiling the model
model_4.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fitting the data to the model
history_4 = model_4.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(x_test, y_test))

In [None]:
# Evaluating the model
score = model_4.evaluate(x_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

# Test and train accuracy of the model
model_4_test = score[1]
model_4_train = max(history_4.history['acc'])

# Plotting Train and Test Loss VS no. of epochs
# list of epoch numbers
x = list(range(1,epochs+1))

# Validation loss
vy = history_4.history['val_loss']
# Training loss
ty = history_4.history['loss']

# Calling the function to draw the plot
plt_dynamic(x, vy, ty)

In [2]:
print("Activation function= Relu")
print("Epochs= 12")
print("Batch size= 128")

from prettytable import PrettyTable
x=PrettyTable()
x.field_names = ["Kernel","CNN layers","Test accuracy","Optimal Epochs"]
x.add_row(["3x3","3 layers","0.9945","8"])
x.add_row(["4x4","4 layers","0.9930","12"])
x.add_row(["5x5","5 layers","0.9953","Didn't merge till 12 epochs"])

print(x)

Activation function= Relu
Epochs= 12
Batch size= 128
+--------+------------+---------------+-----------------------------+
| Kernel | CNN layers | Test accuracy |        Optimal Epochs       |
+--------+------------+---------------+-----------------------------+
|  3x3   |  3 layers  |     0.9945    |              8              |
|  4x4   |  4 layers  |     0.9930    |              12             |
|  5x5   |  5 layers  |     0.9953    | Didn't merge till 12 epochs |
+--------+------------+---------------+-----------------------------+
