Follow code from the following tutorial: https://elitedatascience.com/keras-tutorial-deep-learning-in-python

# Import librairies and modules

In [1]:
import numpy as np
np.random.seed(123)  # for reproducibility

In [2]:
from keras.models import Sequential                                 # Keras model module
from keras.layers import Dense, Dropout, Activation, Flatten        # Keras core layers
from keras.layers import Convolution2D, MaxPooling2D                # Keras CNN layers
from keras.utils import np_utils                                    # Utilities

  from ._conv import register_converters as _register_converters
Using Theano backend.


# Load pre-shuffled MNIST data into train and test sets

In [3]:
from keras.datasets import mnist
 
# Load pre-shuffled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [4]:
print(X_train.shape)
# (60000, 28, 28)

(60000, 28, 28)


In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
plt.imshow(X_train[0])

# Preprocess input data

In [6]:
# reshape to declare depth dimension (e.g. depth is 3 for RGB)
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

In [7]:
print(X_train.shape)

(60000, 1, 28, 28)


In [8]:
# Convert data type and normalize values
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
# normalize our data values to the range [0, 1]
X_train /= 255
X_test /= 255

# Preprocess class labels

In [9]:
print(y_train.shape)
# (60000,)

(60000,)


In [10]:
print(y_train[:10])

[5 0 4 1 9 2 1 3 1 4]


We want labels to be represented as a matrix of zeros and ones.

In [11]:
# Convert 1-dimensional class arrays to 10-dimensional class matrices
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

In [12]:
print(Y_train.shape)

(60000, 10)


In [13]:
Y_train[0:6,]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

# Define model architecture

When you're just starting out, you can just replicate proven architectures from academic papers or use existing examples. Here's a list of example implementations in Keras.

In [14]:
# Declare sequential model
model = Sequential()

In [15]:
# CNN input layer
# default is data_format="channels_last"
model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1,28,28),data_format="channels_first"))

In [16]:
print(model.output_shape)

(None, 32, 26, 26)


In [17]:
model.add(Convolution2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))                                 # method for regularizing to prevent overfitting

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

# Compile model

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, 
          batch_size=32, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

# Evaluate on test set

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)