In [2]:
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility

# network and training
# NB_EPOCH = 200
# NB_EPOCH = 20
# Below is my changed parameter and the original
# NB_EPOCH = 250
NB_EPOCH = 20
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10  
OPTIMIZER = SGD()  
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2  # How much TRAIN is reserved for VALIDATION
# data: shuffled and split between train and test sets

(X_train, y_train), (X_test, y_test) = mnist.load_data()
RESHAPED = 784

X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# normalize
X_train /= 255
X_test /= 255

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices 

Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIMIZER,
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

# First parameter change
BATCH_SIZE = 45

# Code changed to accomodate parameter change
history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score for BATCH_SIZE=45:", score[0])
print('Test accuracy for BATCH_SIZE=45:', score[1])

# Second parameter change
NB_EPOCH = 40

# Change code to accomodate parameter change
history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score for NB_EPOCH=40:", score[0])
print('Test accuracy for NB_EPOCH=40:', score[1])

# Final parameter change
N_HIDDEN = 142

model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIMIZER,
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score for N_HIDDEN=142:", score[0])
print('Test accuracy for N_HIDDEN=142:', score[1])



60000 train samples
10000 test samples
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_4 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_5 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [None]:
# Without changing any parameters from the original code, we have a baseline accuracy of around 90%
# Following the texbook and adding improvements to the code shown on pages 22 and 23, we get a better accuracy of almost 95%
# Now we have improved the code further as shown on page 25, the accuracy on training is around 90%, around 95% validation, and around 95% on the test. 
# Testing my own parameters,we get train score for batch size: 0.105, test accuracy almost 97%. The test accuracy became its highest yet, but the score the lowest for batch size. 
# The lower the batch_size, the higher the accuracy, but the lower the score. 
# For the epoch change: test score is 0.082 and accuracy is almost 98%, which is another improvement on accuracy but disprovement on score. 
# For the final parameter n_hidden is score 0.079 and accuracy 0.98. 
# The common occurence here is that as the variables get lower the accuracy gets higher and the score gets lower. 