# Neural network for classification on MNIST


## (1) Preparing dataset

In [None]:
# 1. Import MNIST datasets and labels from Keras.
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
#2. Extract the shape of the train and test datasets
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)

In [None]:
# visualize a few image samples
import matplotlib.pyplot as plt

plt.imshow(x_train[1])
plt.show()

plt.imshow(x_train[2])
plt.show()

In [None]:

#Reshape the train and test dataset,28x28 = 784

x_train = x_train.reshape(60000,784)
x_test = x_test.reshape(10000, 784)

#cast datasets’ format to float32
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalize the datasets(values in [0,1])
x_train /= 255
x_test /= 255

# transform the labels into class vectors 
#vectors containing a unique 1 indicating the class, other entries are 0
from keras.utils import np_utils
n_classes = 10
print("Shape before one-hot encoding: ", y_train.shape)
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)
print("Shape after one-hot encoding: ", y_train.shape)

## (2) Building the network

In [None]:
# Create the 1st neural network
from keras.models import Sequential
from keras.layers import Dense, Activation

#initialize
model = Sequential()

#hidden layer
model.add(Dense(256,input_shape=(784,))) 
model.add(Activation('sigmoid'))

#output layer
model.add(Dense(10)) 
model.add(Activation('softmax')) # use softmax for multi-class targets
       

In [None]:
#To visualize the architecture of neural network
!pip install pydot
!pip install GraphViz

In [None]:
from keras.utils import plot_model
plot_model(model,show_shapes=True,  to_file='model.png')

In [None]:
from IPython.display import display, Image
display(Image(filename='model.png'))

##### Number_of_weights = 784 x 256 + 256 x 10 = 203264
##### Number_of_biases = 256 + 10 = 266
##### Number of learnable parameters = 203264 + 266 = 203530

## (3) Compiling and training the model

In [None]:
# compile
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd')

# Train the model for 10 epochs, with a batch size of 64 and 
#store the results in a handle. 
#Split the training set into a validation set and an actual training set. 
history=model.fit(x_train, y_train, epochs=10, batch_size=64,validation_split=0.1)


In [None]:
#Plot in the same figure the train/validation accuracy curves 

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.show()


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

The figures show that the loss on the training set is decreasing, which means the network is learning to classify the digits. However, the training loss is larger than the validation loss, and the loss curves could have continued more. Therefore, more epoches and bigger network are needed.

## (4) Evaluation and save the model

In [None]:
# Evaluate trained model with a batch size of 128 and 
#store the test loss and the test accuracy.
score = model.evaluate(x_test, y_test, batch_size=128)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Save the model into a .json file 
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
#save learned weights into a HDF5 file
model.save_weights('weights.hdf5')

## （6） Experiment
#### a) Compare sigmoid vs ReLU activations

In [None]:
#Create another model using ReLU activation
model2 = Sequential()

#hidden layer
model2.add(Dense(256,input_shape=(784,))) 
model2.add(Activation('relu'))

#output layer
model2.add(Dense(10)) 
model2.add(Activation('softmax'))

#compile
model2.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd')

#train
history2=model2.fit(x_train, y_train, epochs=10, batch_size=64,validation_split=0.1)


In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.plot(history2.history['acc'])
plt.plot(history2.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_sigmoid', 'validation_sigmoid','train_relu','validation_relu'], loc='lower right')
plt.show()


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.plot(history2.history['loss'])
plt.plot(history2.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_sigmoid', 'validation_sigmoid','train_relu','validation_relu'], loc='upper right')
plt.show()

It shows that 'relu' leads to higher accuracy and smaller loss than 'sigmoid'.