# MNIST Implementaiton with CNN - Improving accuracy #3

<img src="https://miro.medium.com/max/700/1*5A4b1qOZIr4Q6SKceqGn7w.jpeg"/>

In [1]:
# Load necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D
from keras.models import Sequential
from keras.utils import to_categorical
from keras.datasets import mnist
import pandas as pd

In [2]:
# Load MNIST handwritten digit data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#Lets do a min max scaling to make it between 0 & 1. This will be good for us in training.
X_train=(X_train - X_train.min()) /(X_train.max() - X_train.min())
X_test=(X_test - X_test.min()) /(X_test.max() - X_test.min())

# Convert y_train into one-hot format 
y_train_hot = to_categorical(y_train, num_classes=10)
y_test_hot = to_categorical(y_test, num_classes=10)

In [3]:
# reshape dataset to have a single channel. Keras conv2d expects a 4 dim vector input
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))

### Randomly trying with a convolution layer and a pooling layer

In [4]:
# Create simple Neural Network model
model = Sequential()
model.add(Conv2D(28, kernel_size=(3,3), input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())                         # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))   # Output layer 10 neurons
model.summary()

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['acc'])

# Train the Neural Network model
model.fit(X_train, y_train_hot, epochs=5,validation_data=(X_test,y_test_hot))
# evaluate the model...Verbose=0 is used to avoid report displays
_, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
_, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 28)        280       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 28)        0         
_________________________________________________________________
flatten (Flatten)            (None, 4732)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               605824    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 607,394
Trainable params: 607,394
Non-trainable params: 0
__________________________________________________

### Got 98.3% accuracy with in 5 epochs

### Now gonna try with different loss functions and optimizers.


In [5]:
# Creaing lists of optimizers and loss functions
loss=['categorical_crossentropy','kl_divergence','poisson','sparse_categorical_crossentropy']
opt=['Adadelta','Adagrad','Adam','Adamax','Nadam','RMSprop','SGD']
# Creating a dataframe for keeping results
df=pd.DataFrame(columns =['Loss','Optimizer','Train Accuracy','Test Accuracy'])

In [6]:
# Looping through all the possibilities with 50 Epocs.. This will run for a few hours.
for l in loss:
    for o in opt:
        model=Sequential([  Conv2D(28, kernel_size=(3,3), input_shape=(28,28,1)),
                            MaxPooling2D(pool_size=(2, 2)),
                            Flatten(),
                            Dense(128, activation='relu'),
                            Dropout(0.2),
                            Dense(10,activation='softmax')  ])                         
        model.compile(optimizer=o, loss=l, metrics=['acc'])
        if l == 'sparse_categorical_crossentropy': # No one hot encoding required for this
            model.fit(X_train,y_train,epochs=20,validation_data=(X_test,y_test),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test, verbose=0)
        else :
            model.fit(X_train,y_train_hot,epochs=20,validation_data=(X_test,y_test_hot),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
        print('Loss: %s, Optimizer: %s, Train: %.3f, Test: %.3f' % (l,o,train_acc*100, test_acc*100))
        df = df.append({'Loss':l,'Optimizer':o,'Train Accuracy':train_acc*100,'Test Accuracy':test_acc*100}, ignore_index=True)  

Loss: categorical_crossentropy, Optimizer: Adadelta, Train: 89.495, Test: 90.430
Loss: categorical_crossentropy, Optimizer: Adagrad, Train: 95.358, Test: 95.480
Loss: categorical_crossentropy, Optimizer: Adam, Train: 99.948, Test: 98.550
Loss: categorical_crossentropy, Optimizer: Adamax, Train: 99.925, Test: 98.650
Loss: categorical_crossentropy, Optimizer: Nadam, Train: 99.952, Test: 98.540
Loss: categorical_crossentropy, Optimizer: RMSprop, Train: 99.970, Test: 98.590
Loss: categorical_crossentropy, Optimizer: SGD, Train: 98.987, Test: 98.040
Loss: kl_divergence, Optimizer: Adadelta, Train: 89.315, Test: 90.120
Loss: kl_divergence, Optimizer: Adagrad, Train: 95.315, Test: 95.350
Loss: kl_divergence, Optimizer: Adam, Train: 99.955, Test: 98.540
Loss: kl_divergence, Optimizer: Adamax, Train: 99.887, Test: 98.490
Loss: kl_divergence, Optimizer: Nadam, Train: 99.962, Test: 98.610
Loss: kl_divergence, Optimizer: RMSprop, Train: 99.943, Test: 98.490
Loss: kl_divergence, Optimizer: SGD, Tra

In [7]:
# Display sorted df
df.sort_values(['Test Accuracy'],ascending=False,ignore_index=True)

Unnamed: 0,Loss,Optimizer,Train Accuracy,Test Accuracy
0,poisson,Adamax,99.931669,98.820001
1,sparse_categorical_crossentropy,Adamax,99.953336,98.790002
2,categorical_crossentropy,Adamax,99.924999,98.650002
3,sparse_categorical_crossentropy,Adam,99.951667,98.629999
4,kl_divergence,Nadam,99.961668,98.610002
5,categorical_crossentropy,RMSprop,99.970001,98.589998
6,sparse_categorical_crossentropy,Nadam,99.961668,98.589998
7,poisson,RMSprop,99.940002,98.570001
8,categorical_crossentropy,Adam,99.948335,98.549998
9,categorical_crossentropy,Nadam,99.951667,98.540002


# We were able to get 98.82% accuracy