# MNIST Implementaiton with ANN - Improving accuracy

In [1]:
# Load necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Flatten
from keras.models import Sequential
from keras.utils import to_categorical
from keras.datasets import mnist
import pandas as pd

In [2]:
# Load MNIST handwritten digit data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#Lets do a min max scaling to make it between 0 & 1. This will be good for us in training.
X_train=(X_train - X_train.min()) /(X_train.max() - X_train.min())
X_test=(X_test - X_test.min()) /(X_test.max() - X_test.min())

# Convert y_train into one-hot format 
y_train_hot = to_categorical(y_train, num_classes=10)
y_test_hot = to_categorical(y_test, num_classes=10)

### Randomly trying with 3 hidden layers with 100 nodes each. Also RELU activation is used.

##### Note : model.fit includes an optional argument batch_size. If unspecified, batch_size will default to 32
##### 60000/32 = 1875

In [3]:
# Create simple Neural Network model
model = Sequential()
model.add(Flatten(input_shape=(28,28)))      # Input layer 784 neurons
model.add(Dense(100, activation='relu'))     # 1st Hidden layer 100 neurons
model.add(Dense(100, activation='relu'))     # 2nd Hidden layer 100 neurons
model.add(Dense(100, activation='relu'))     # 3rd Hidden layer 100 neurons
model.add(Dense(10, activation='softmax'))   # Output layer 10 neurons
model.summary()
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['acc'])
# Train the Neural Network model

model.fit(X_train, y_train_hot, epochs=5,validation_data=(X_test,y_test_hot))
# evaluate the model...Verbose=0 is used to avoid report displays
_, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
_, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 100)               78500     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1010      
Total params: 99,710
Trainable params: 99,710
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train: 0.985, Test: 0.971


### Got 97.1% accuracy with in 5 epochs

### Now gonna try with different loss functions and optimizaers.


In [4]:
# Creaing lists of optimizers and loss functions
loss=['categorical_crossentropy','kl_divergence','poisson','sparse_categorical_crossentropy']
opt=['Adadelta','Adagrad','Adam','Adamax','Nadam','RMSprop','SGD']
# Creating a dataframe for keeping results
df=pd.DataFrame(columns =['Loss','Optimizer','Train Accuracy','Test Accuracy'])

In [5]:
# Looping through all the possibilities with 100 Epocs.. This will run for a few hours.
for l in loss:
    for o in opt:
        model=Sequential([  Flatten(input_shape=(28,28)),
                            Dense(100,activation='relu'),
                            Dense(100,activation='relu'),
                            Dense(100,activation='relu'),
                            Dense(10,activation='softmax')  ])                         
        model.compile(optimizer=o, loss=l, metrics=['acc'])
        if l == 'sparse_categorical_crossentropy': # No one hot encoding required for this
            model.fit(X_train,y_train,epochs=100,validation_data=(X_test,y_test),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test, verbose=0)
        else :
            model.fit(X_train,y_train_hot,epochs=100,validation_data=(X_test,y_test_hot),verbose=0)
            _, train_acc = model.evaluate(X_train, y_train_hot, verbose=0)
            _, test_acc = model.evaluate(X_test, y_test_hot, verbose=0)
        print('Loss: %s, Optimizer: %s, Train: %.3f, Test: %.3f' % (l,o,train_acc*100, test_acc*100))
        df = df.append({'Loss':l,'Optimizer':o,'Train Accuracy':train_acc*100,'Test Accuracy':test_acc*100}, ignore_index=True)  

Loss: categorical_crossentropy, Optimizer: Adadelta, Train: 91.773, Test: 92.160
Loss: categorical_crossentropy, Optimizer: Adagrad, Train: 96.517, Test: 96.120
Loss: categorical_crossentropy, Optimizer: Adam, Train: 99.903, Test: 98.050
Loss: categorical_crossentropy, Optimizer: Adamax, Train: 100.000, Test: 97.880
Loss: categorical_crossentropy, Optimizer: Nadam, Train: 99.935, Test: 98.110
Loss: categorical_crossentropy, Optimizer: RMSprop, Train: 99.857, Test: 97.720
Loss: categorical_crossentropy, Optimizer: SGD, Train: 100.000, Test: 97.790
Loss: kl_divergence, Optimizer: Adadelta, Train: 91.655, Test: 91.800
Loss: kl_divergence, Optimizer: Adagrad, Train: 96.572, Test: 96.040
Loss: kl_divergence, Optimizer: Adam, Train: 99.883, Test: 97.810
Loss: kl_divergence, Optimizer: Adamax, Train: 99.997, Test: 97.780
Loss: kl_divergence, Optimizer: Nadam, Train: 99.880, Test: 98.150
Loss: kl_divergence, Optimizer: RMSprop, Train: 99.852, Test: 97.680
Loss: kl_divergence, Optimizer: SGD, T

In [6]:
# Display sorted df
df.sort_values(['Test Accuracy'],ascending=False,ignore_index=True)

Unnamed: 0,Loss,Optimizer,Train Accuracy,Test Accuracy
0,kl_divergence,Nadam,99.879998,98.150003
1,categorical_crossentropy,Nadam,99.935001,98.110002
2,categorical_crossentropy,Adam,99.903333,98.049998
3,sparse_categorical_crossentropy,Nadam,99.919999,98.000002
4,sparse_categorical_crossentropy,Adamax,100.0,97.909999
5,poisson,Adam,99.879998,97.890002
6,categorical_crossentropy,Adamax,100.0,97.88
7,poisson,Adamax,100.0,97.869998
8,sparse_categorical_crossentropy,Adam,99.865001,97.850001
9,kl_divergence,Adam,99.883336,97.81


# We were able to get 98.15% accuracy