# Neural Network to Identify Handwriten Characters

### Imports

In [1]:
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils

np.random.seed(1671) # for reproduciblity

Using TensorFlow backend.


### Set Initial Parameters

In [2]:
NB_EPOCH = 20
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # optimixer
N_HIDDEN = 128

### Get Dataset for Train and Test

In [3]:
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 60000 rows of 28x28 values --> reshaped in RESHAPED = 784
RESHAPED = 784

X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')


### Normalize Dataset

In [4]:
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 


60000 train samples
10000 test samples


### Convert Class Vectors to Binary Class Matrices

In [5]:
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 

### Build Model

In [6]:
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) # final stage is softmax
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
________________________________________________

### Train Model

In [7]:
history = model.fit(X_train, 
                    Y_train, 
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Evaluate Model

In [8]:
example_score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score:", example_score[0]) 
print('Test accuracy:', example_score[1])

Test score: 0.18599770209044217
Test accuracy: 0.9463000297546387


## Experiment with Hidden Neuron Density

### 16 Hidden Neurons per Layer 

In [9]:
N_HIDDEN = 16

### Rebuild Model

In [10]:
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) # final stage is softmax
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 16)                12560     
_________________________________________________________________
activation_4 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_5 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                170       
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 13,002
Trainable params: 13,002
Non-trainable params: 0
__________________________________________________

### Retrain Model

In [11]:
history = model.fit(X_train, 
                    Y_train, 
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Re-evaluate Model

In [12]:
score_16_hidden = model.evaluate(X_test, Y_test, verbose=VERBOSE)



### 1024 Hidden Neurons per Layer

In [13]:
N_HIDDEN = 1024

### Rebuild Model

In [14]:
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) # final stage is softmax
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 1024)              803840    
_________________________________________________________________
activation_7 (Activation)    (None, 1024)              0         
_________________________________________________________________
dense_8 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
activation_8 (Activation)    (None, 1024)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                10250     
_________________________________________________________________
activation_9 (Activation)    (None, 10)                0         
Total params: 1,863,690
Trainable params: 1,863,690
Non-trainable params: 0
____________________________________________

### Retrain Model

In [15]:
history = model.fit(X_train, 
                    Y_train, 
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Re-evaluate Model

In [16]:
score_1024_hidden = model.evaluate(X_test, Y_test, verbose=VERBOSE)



## Experiment Results

In [18]:
print("Original 128 neurons per hidden layer")
print("Test score:", example_score[0]) 
print('Test accuracy:', example_score[1])
print()
print("16 neurons per hidden layer")
print("Test score:", score_16_hidden[0]) 
print('Test accuracy:', score_16_hidden[1])
print()
print("1024 neurons per hidden layer")
print("Test score:", score_1024_hidden[0]) 
print('Test accuracy:', score_1024_hidden[1])

Original 128 neurons per hidden layer
Test score: 0.18599770209044217
Test accuracy: 0.9463000297546387

16 neurons per hidden layer
Test score: 0.25991266669929025
Test accuracy: 0.9240999817848206

1024 neurons per hidden layer
Test score: 0.164773375582695
Test accuracy: 0.9516000151634216


## Conclusion

    Above I have changed the number of neurons in the 2 hidden layers in this neural network. I experimented with 16, 128, and 1025 neurons per layer. This does not affect the training or testing data but does have an effect on the outcome of the model. With each increase in the number of neurons we saw an increase in the validation accuracy rate and a decrease in the loss. This shows that the model is better at predicting the handwritten digits and isn’t overtraining. The cost of this though is computation. The 16 neurons took 1 second to train each epoch, the 128 neurons took 3 seconds, and the 1025 neurons took 20 seconds. This accelerating cost in computation is also paired with a diminishing return in accuracy. The validation accuracy only increased from 92% with 16 neurons, to 94.6% with 128, and 95.2% for 1025 neurons. This diminishing returns suggests that for a specific set of constraints there is an optimal number of neurons that will generate the best accuracy without sacrificing unnecessary computational resources, and time.