In [1]:
import numpy
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense

# Select a consant seed to repeat the solution later
seed = 8
numpy.random.seed(seed)

In [2]:
# Split and load the 3 dimensional data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape the 28x28 images to data with a dimension [Samples][width][height][pixels]
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255

# Introduce one hot encoding for each class
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
numberClasses = y_test.shape[1]

In [3]:
# Define CNN
def CNN():
    # Create the model
    model = Sequential()
    # 1st layer is a convolution layer with 32 feature map and a 5x5
    # sliding window. Here we specify that the input is a 28x28 image
    model.add(Conv2D(32, (5, 5), input_shape = (28, 28, 1), activation = 'relu'))
    # 2nd layer is a pooling layer that pools a 2x2 to the max value
    model.add(MaxPooling2D(pool_size = (2, 2)))              
    # 3rd layer is a dropout layer that is used to drop 20% of the 
    # activations by keeping them 0 inorder to reduce overfitting
    model.add(Dropout(0.2))
    # 4th layer is a flatten layer that helps in reducing the 2 dimensional
    # matrix into a vector form so that a fully connected network can work 
    # on it
    model.add(Flatten())
    # 5th layer is a fully connected layer that takes in the provided vector
    model.add(Dense(100, activation = 'relu'))
    # 6th layer is the output softmax layer that gives the probabilty over the 10 classes
    model.add(Dense(numberClasses, activation = 'softmax'))
    # Compile the above model
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

In [4]:
# Fit data using the model and evaluate the model
modelInstance = CNN()
modelInstance.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 10, batch_size = 200, verbose = 2)
scores = modelInstance.evaluate(X_test, y_test, verbose = 0)
print("Error : %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 46s - loss: 0.2682 - acc: 0.9231 - val_loss: 0.0835 - val_acc: 0.9756
Epoch 2/10
 - 50s - loss: 0.0780 - acc: 0.9764 - val_loss: 0.0566 - val_acc: 0.9809
Epoch 3/10
 - 49s - loss: 0.0562 - acc: 0.9828 - val_loss: 0.0458 - val_acc: 0.9842
Epoch 4/10
 - 49s - loss: 0.0445 - acc: 0.9865 - val_loss: 0.0389 - val_acc: 0.9866
Epoch 5/10
 - 49s - loss: 0.0359 - acc: 0.9888 - val_loss: 0.0416 - val_acc: 0.9863
Epoch 6/10
 - 52s - loss: 0.0312 - acc: 0.9901 - val_loss: 0.0317 - val_acc: 0.9889
Epoch 7/10
 - 59s - loss: 0.0257 - acc: 0.9918 - val_loss: 0.0298 - val_acc: 0.9896
Epoch 8/10
 - 50s - loss: 0.0208 - acc: 0.9936 - val_loss: 0.0319 - val_acc: 0.9896
Epoch 9/10
 - 46s - loss: 0.0188 - acc: 0.9941 - val_loss: 0.0303 - val_acc: 0.9897
Epoch 10/10
 - 46s - loss: 0.0155 - acc: 0.9950 - val_loss: 0.0333 - val_acc: 0.9901
Error : 0.99%
