In [1]:
import numpy as np
np.random.seed(1337) # for reproducibility

import os
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.utils import np_utils

batch_size = 128 #Number of images used in each optimization step
nb_classes = 10 #One class per digit
nb_epoch = 20 #Number of times the whole data is used to learn

(X_train, y_train), (X_test, y_test) = mnist.load_data()

#Flatten the data, MLP doesn't use the 2D structure of the data. 784 = 28*28
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)

#Make the value floats in [0;1] instead of int in [0;255]
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

#Display the shapes to check if everything's ok
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


Using TensorFlow backend.


(60000, 'train samples')
(10000, 'test samples')


In [2]:

# convert class vectors to binary class matrices (ie one-hot vectors)
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

#Define the model achitecture
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10)) #Last layer with one output per class
model.add(Activation('softmax')) #We want a score simlar to a probability for each class

#Use rmsprop to do the gradient descent see http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
#and http://cs231n.github.io/neural-networks-3/#ada
rms = RMSprop()
#The function to optimize is the cross entropy between the true label and the output (softmax) of the model
model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=["accuracy"])

#Make the model learn
model.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch,
verbose=2,
validation_data=(X_test, Y_test))

#Evaluate how the model does on the test set
score = model.evaluate(X_test, Y_test, verbose=0)

print('Test score:', score[0])
print('Test accuracy:', score[1])



Train on 60000 samples, validate on 10000 samples
Epoch 1/20
 - 15s - loss: 0.2427 - acc: 0.9252 - val_loss: 0.1204 - val_acc: 0.9628
Epoch 2/20
 - 14s - loss: 0.1014 - acc: 0.9688 - val_loss: 0.0793 - val_acc: 0.9761
Epoch 3/20
 - 15s - loss: 0.0762 - acc: 0.9768 - val_loss: 0.0828 - val_acc: 0.9752
Epoch 4/20
 - 17s - loss: 0.0615 - acc: 0.9818 - val_loss: 0.0993 - val_acc: 0.9738
Epoch 5/20
 - 17s - loss: 0.0501 - acc: 0.9851 - val_loss: 0.0770 - val_acc: 0.9796
Epoch 6/20
 - 18s - loss: 0.0436 - acc: 0.9868 - val_loss: 0.0797 - val_acc: 0.9794
Epoch 7/20
 - 16s - loss: 0.0388 - acc: 0.9887 - val_loss: 0.0714 - val_acc: 0.9828
Epoch 8/20
 - 15s - loss: 0.0347 - acc: 0.9899 - val_loss: 0.0818 - val_acc: 0.9832
Epoch 9/20
 - 15s - loss: 0.0320 - acc: 0.9905 - val_loss: 0.0781 - val_acc: 0.9837
Epoch 10/20
 - 14s - loss: 0.0271 - acc: 0.9924 - val_loss: 0.0874 - val_acc: 0.9829
Epoch 11/20
 - 15s - loss: 0.0238 - acc: 0.9930 - val_loss: 0.0854 - val_acc: 0.9832
Epoch 12/20
 - 14s - los