# Simple CNN for MNIST 

Using the MNIST dataset (70 000 pictures of hand-written digits) we will train a simple CNN, which is able to predict a digit given a picture of a hand-written digit with 99% accuracy. 

In [1]:
import numpy as np
np.random.seed(1337)  # for reproducibility
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Input, Conv2D, MaxPool2D
from keras.utils import np_utils
import matplotlib.pyplot as plt

Using TensorFlow backend.


Network parameters:

In [None]:
batch_size = 32
nb_classes = 10
nb_epoch = 15

# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)

Prepare data into training and test set.

In [None]:
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
X_train = X_train[:1000]
y_train = y_train[:1000]

In [None]:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (1, img_rows, img_cols)

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

In [None]:
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [None]:
# build validation data
split = 0.7
split_value = int(0.7 * len(X_train))

X_val = X_train[split_value:]
Y_val = Y_train[split_value:]

X_train = X_train[:split_value]
Y_train = Y_train[:split_value]

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(16, 9))
for i, axes in enumerate(ax.flat):
    axes.imshow(X_train[i, :, :, 0], cmap='gray')

Build the CNN. 

In [None]:
inp = Input(shape=[28, 28, 1])

layer1 = Conv2D(filters=nb_filters, 
                kernel_size=kernel_size, 
                strides=(1, 1), 
                padding='same', 
                activation='relu')(inp)
layer1_maxpool = MaxPool2D(pool_size=(2, 2), padding='same')(layer1)
layer2 = Conv2D(filters=nb_filters, 
                kernel_size=kernel_size, 
                strides=(1, 1), 
                padding='same', 
                activation='relu')(layer1_maxpool)
layer2_maxpool = MaxPool2D(pool_size=(2, 2), padding='same')(layer2)
layer3 = Flatten()(layer2_maxpool)

layer4 = Dense(units=128, activation='relu')(layer3)
layer5 = Dense(units=nb_classes, activation='softmax')(layer4)

model = Model(inputs=inp, outputs=layer5)

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

Show a summary of the model parameters. 

In [None]:
model.summary()

And now train the model and evaluate on the test set. 

In [None]:
history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(X_val, Y_val))

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()