In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.datasets import mnist
from keras.callbacks import ReduceLROnPlateau

Using TensorFlow backend.


### Loading the Dataset from Keras

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

### Preprocessing the Data:

Preprocessing includes scaling the data to [-1,1] and getting zero mean. Also, as the Conv2D requires number of channels as a dimension, we reshaped the dataset to include the one and only grayscal channel.

In [4]:
def preprocess(dataset):
    dataset = (dataset - np.mean(dataset))/255
    dataset = dataset.reshape(-1,28,28,1)
    return dataset
x_train = preprocess(x_train)
x_test = preprocess(x_test)

### 'One-hot'-ing the Labels

Although here, an inbuilt function to_categorical from keras.utils can be used.

In [5]:
def onehot(labels):
    return np.array([[float(i==data) for i in range(10)] for data in labels])
y_train = onehot(y_train)
y_test = onehot(y_test)

### Defining the Model and its Architecture:

The Model defined is a very simple one with two Convolutional Layers and two Dense layers. Dropout layers were used for regularization. Since there are 10 labels (0 to 9), the last layer has 10 output nodes. Softmax activation gives us the probabilities directly.

In [6]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=5, padding='SAME', activation='relu', input_shape=(28,28,1)))
model.add(MaxPool2D(padding = 'SAME'))
model.add(Conv2D(filters=32, kernel_size=5, padding='SAME', activation='relu'))
model.add(MaxPool2D())
model.add(Dropout(0.25))
model.add(Flatten())

In [7]:
model.add(Dense(256, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [10]:
learning_rate_reduction = ReduceLROnPlateau(monitor='acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

### Compiling the Model and Training:

In [12]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.005)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=128, epochs=3, verbose=2, callbacks=[learning_rate_reduction], validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
 - 117s - loss: 0.0345 - acc: 0.9885 - val_loss: 0.0228 - val_acc: 0.9919
Epoch 2/3
 - 117s - loss: 0.0217 - acc: 0.9931 - val_loss: 0.0199 - val_acc: 0.9935
Epoch 3/3
 - 117s - loss: 0.0188 - acc: 0.9945 - val_loss: 0.0204 - val_acc: 0.9939


<keras.callbacks.History at 0x115c77d30>

### Using the Model on a test dataset and Calculating the Accuracy:

In [13]:
model.evaluate(x_test, y_test, verbose=1)



[0.020391063250977823, 0.99390000000000001]