# MNIST Implementaiton with ANN - Improving accuracy #1

In [1]:
# Load necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Flatten
from keras.models import Sequential
from keras.utils import to_categorical
from keras.datasets import mnist
import pandas as pd

In [2]:
# Load MNIST handwritten digit data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#Lets do a min max scaling to make it between 0 & 1. This will be good for us in training.
X_train=(X_train - X_train.min()) /(X_train.max() - X_train.min())
X_test=(X_test - X_test.min()) /(X_test.max() - X_test.min())

# Convert y_train into one-hot format 
y_train_hot = to_categorical(y_train, num_classes=10)
y_test_hot = to_categorical(y_test, num_classes=10)

##### Note : model.fit includes an optional argument batch_size. If unspecified, batch_size will default to 32
##### 60000/32 = 1875

In [3]:
# Create simple Neural Network model
model = Sequential()
model.add(Flatten(input_shape=(28,28)))      # Input layer 784 neurons
model.add(Dense(16, activation='relu'))     # 1st Hidden layer 16 neurons
model.add(Dense(16, activation='relu'))     # 2nd Hidden layer 16 neurons
model.add(Dense(10, activation='softmax'))   # Output layer 10 neurons
model.summary()
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['acc'])
# Train the Neural Network model

model.fit(X_train, y_train_hot, epochs=5,validation_data=(X_test,y_test_hot))
# evaluate the model...Verbose=0 is used to avoid report displays
_, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
_, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 16)                12560     
_________________________________________________________________
dense_1 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                170       
Total params: 13,002
Trainable params: 13,002
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train: 0.958, Test: 0.952


### Got 95.1% accuracy with in 5 epochs

### Now gonna try with different loss functions and optimizaers.


In [4]:
# Creaing lists of optimizers and loss functions
loss=['categorical_crossentropy','kl_divergence','poisson','sparse_categorical_crossentropy']
opt=['Adadelta','Adagrad','Adam','Adamax','Nadam','RMSprop','SGD']
# Creating a dataframe for keeping results
df=pd.DataFrame(columns =['Loss','Optimizer','Train Accuracy','Test Accuracy'])

In [5]:
# Looping through all the possibilities with 100 Epocs.. This will run for a few hours.
for l in loss:
    for o in opt:
        model=Sequential([  Flatten(input_shape=(28,28)),
                            Dense(16,activation='relu'),
                            Dense(16,activation='relu'),
                            Dense(10,activation='softmax')  ])                         
        model.compile(optimizer=o, loss=l, metrics=['acc'])
        if l == 'sparse_categorical_crossentropy': # No one hot encoding required for this
            model.fit(X_train,y_train,epochs=100,validation_data=(X_test,y_test),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test, verbose=0)
        else :
            model.fit(X_train,y_train_hot,epochs=100,validation_data=(X_test,y_test_hot),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
        print('Loss: %s, Optimizer: %s, Train: %.3f, Test: %.3f' % (l,o,train_acc*100, test_acc*100))
        df = df.append({'Loss':l,'Optimizer':o,'Train Accuracy':train_acc*100,'Test Accuracy':test_acc*100}, ignore_index=True)  

Loss: categorical_crossentropy, Optimizer: Adadelta, Train: 83.548, Test: 84.530
Loss: categorical_crossentropy, Optimizer: Adagrad, Train: 91.267, Test: 91.530
Loss: categorical_crossentropy, Optimizer: Adam, Train: 98.948, Test: 95.360
Loss: categorical_crossentropy, Optimizer: Adamax, Train: 97.732, Test: 95.660
Loss: categorical_crossentropy, Optimizer: Nadam, Train: 99.125, Test: 95.610
Loss: categorical_crossentropy, Optimizer: RMSprop, Train: 97.462, Test: 94.890
Loss: categorical_crossentropy, Optimizer: SGD, Train: 97.552, Test: 95.480
Loss: kl_divergence, Optimizer: Adadelta, Train: 82.328, Test: 82.930
Loss: kl_divergence, Optimizer: Adagrad, Train: 91.200, Test: 91.180
Loss: kl_divergence, Optimizer: Adam, Train: 98.908, Test: 95.110
Loss: kl_divergence, Optimizer: Adamax, Train: 97.693, Test: 95.720
Loss: kl_divergence, Optimizer: Nadam, Train: 98.652, Test: 95.210
Loss: kl_divergence, Optimizer: RMSprop, Train: 97.952, Test: 95.240
Loss: kl_divergence, Optimizer: SGD, Tra

In [6]:
# Display sorted df
df.sort_values(['Test Accuracy'],ascending=False,ignore_index=True)

Unnamed: 0,Loss,Optimizer,Train Accuracy,Test Accuracy
0,sparse_categorical_crossentropy,Adamax,97.898334,95.94
1,kl_divergence,SGD,97.751665,95.740002
2,kl_divergence,Adamax,97.693336,95.719999
3,categorical_crossentropy,Adamax,97.731668,95.660001
4,categorical_crossentropy,Nadam,99.124998,95.609999
5,poisson,Adamax,97.933334,95.569998
6,categorical_crossentropy,SGD,97.551668,95.480001
7,sparse_categorical_crossentropy,RMSprop,97.671664,95.420003
8,categorical_crossentropy,Adam,98.948336,95.359999
9,sparse_categorical_crossentropy,Adam,98.908335,95.310003


# We were able to get 95.94% accuracy