# MNIST Implementaiton with CNN - Improving accuracy #4

<img src="https://miro.medium.com/max/5588/1*k2xYkvn75VXOwzh726kILw.png"/>

In [1]:
# Load necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D
from keras.optimizers import RMSprop
from keras.models import Sequential
from keras.utils import to_categorical
from keras.datasets import mnist
import pandas as pd

In [2]:
# Load MNIST handwritten digit data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#Lets do a min max scaling to make it between 0 & 1. This will be good for us in training.
X_train=(X_train - X_train.min()) /(X_train.max() - X_train.min())
X_test=(X_test - X_test.min()) /(X_test.max() - X_test.min())

# Convert y_train into one-hot format 
y_train_hot = to_categorical(y_train, num_classes=10)
y_test_hot = to_categorical(y_test, num_classes=10)

In [3]:
# reshape dataset to have a single channel. Keras conv2d expects a 4 dim vector input
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))

### Randomly trying with a convolution layer and a pooling layer

In [4]:
# Create complex Neural Network model
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))
model.summary()

# Define the optimizer
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])

# Train the Neural Network model
model.fit(X_train, y_train_hot, epochs=2,validation_data=(X_test,y_test_hot))
# evaluate the model...Verbose=0 is used to avoid report displays
_, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
_, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        25632     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0

### Got 98.3% accuracy with in 2 epochs

### Now gonna try with different loss functions and optimizers.


In [5]:
# Creaing lists of optimizers and loss functions
loss=['categorical_crossentropy','kl_divergence','poisson','sparse_categorical_crossentropy']
opt=['Adadelta','Adagrad','Adam','Adamax','Nadam','RMSprop','SGD']
# Creating a dataframe for keeping results
df=pd.DataFrame(columns =['Loss','Optimizer','Train Accuracy','Test Accuracy'])

In [6]:
# Looping through all the possibilities with 10 Epocs.. This will run for a few hours.
for l in loss:
    for o in opt:
        model = Sequential()
        model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
        model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))
        model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
        model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
        model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(256, activation = "relu"))
        model.add(Dropout(0.5))
        model.add(Dense(10, activation = "softmax"))                        
        model.compile(optimizer=o, loss=l, metrics=['acc'])
        if l == 'sparse_categorical_crossentropy': # No one hot encoding required for this
            model.fit(X_train,y_train,epochs=10,validation_data=(X_test,y_test),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test, verbose=0)
        else :
            model.fit(X_train,y_train_hot,epochs=10,validation_data=(X_test,y_test_hot),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
        print('Loss: %s, Optimizer: %s, Train: %.3f, Test: %.3f' % (l,o,train_acc*100, test_acc*100))
        df = df.append({'Loss':l,'Optimizer':o,'Train Accuracy':train_acc*100,'Test Accuracy':test_acc*100}, ignore_index=True)  

Loss: categorical_crossentropy, Optimizer: Adadelta, Train: 84.762, Test: 86.140
Loss: categorical_crossentropy, Optimizer: Adagrad, Train: 97.465, Test: 97.540
Loss: categorical_crossentropy, Optimizer: Adam, Train: 99.705, Test: 99.330
Loss: categorical_crossentropy, Optimizer: Adamax, Train: 99.743, Test: 99.470
Loss: categorical_crossentropy, Optimizer: Nadam, Train: 99.717, Test: 99.390
Loss: categorical_crossentropy, Optimizer: RMSprop, Train: 98.902, Test: 98.720
Loss: categorical_crossentropy, Optimizer: SGD, Train: 99.285, Test: 99.150
Loss: kl_divergence, Optimizer: Adadelta, Train: 83.638, Test: 84.760
Loss: kl_divergence, Optimizer: Adagrad, Train: 97.475, Test: 97.690
Loss: kl_divergence, Optimizer: Adam, Train: 99.613, Test: 99.230
Loss: kl_divergence, Optimizer: Adamax, Train: 99.677, Test: 99.380
Loss: kl_divergence, Optimizer: Nadam, Train: 99.735, Test: 99.550
Loss: kl_divergence, Optimizer: RMSprop, Train: 98.883, Test: 98.730
Loss: kl_divergence, Optimizer: SGD, Tra

In [7]:
# Display sorted df
df.sort_values(['Test Accuracy'],ascending=False,ignore_index=True)

Unnamed: 0,Loss,Optimizer,Train Accuracy,Test Accuracy
0,kl_divergence,Nadam,99.734998,99.550003
1,poisson,Adam,99.791664,99.540001
2,sparse_categorical_crossentropy,Adam,99.756664,99.510002
3,sparse_categorical_crossentropy,Nadam,99.766666,99.479997
4,categorical_crossentropy,Adamax,99.74333,99.470001
5,poisson,Adamax,99.680001,99.440002
6,poisson,Nadam,99.766666,99.430001
7,categorical_crossentropy,Nadam,99.716669,99.39
8,kl_divergence,Adamax,99.676669,99.379998
9,sparse_categorical_crossentropy,Adamax,99.691665,99.360001


# We were able to get 99.55% accuracy