##Imports

In [1]:
import keras
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


## Load, split and preprocess the dataset

In [0]:
(x_train, y_train), (x_tv, y_tv) = mnist.load_data()
x_val, x_test, y_val, y_test = train_test_split(x_tv, y_tv, 
                                                test_size=0.5, random_state=42)

rows, cols = 28, 28
num_classes = 10

x_train = x_train.reshape(x_train.shape[0], rows, cols, 1)
x_val = x_val.reshape(x_val.shape[0], rows, cols, 1)
x_test = x_test.reshape(x_test.shape[0], rows, cols, 1)
input_shape = (rows, cols, 1)

x_train = x_train.astype('float32') / 255
x_val = x_val.astype('float32') / 255
x_test = x_test.astype('float32') / 255

y_train = keras.utils.to_categorical(y_train, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

##Define basic CNN

In [0]:
model = Sequential()

model.add(Conv2D(32, (3, 3),
                 activation='relu',
                 input_shape=input_shape))

model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

##Training process

In [4]:
early_stopping = EarlyStopping(monitor='val_acc', mode='max', patience=5, verbose=1)
model_checkpoint = ModelCheckpoint('best.model', monitor='val_acc', mode='max', save_best_only=True, verbose=1)
reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_acc', mode='max', factor=0.7, patience=3, verbose=1)

model.fit(x_train, y_train,
          batch_size=512,
          epochs=100,
          callbacks=[early_stopping, model_checkpoint, reduce_lr_on_plateau],
          verbose=1,
          validation_data=(x_val, y_val))

Train on 60000 samples, validate on 5000 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.96800, saving model to best.model
Epoch 2/100

Epoch 00002: val_acc improved from 0.96800 to 0.98420, saving model to best.model
Epoch 3/100

Epoch 00003: val_acc did not improve from 0.98420
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.98420
Epoch 5/100

Epoch 00005: val_acc improved from 0.98420 to 0.98860, saving model to best.model
Epoch 6/100

Epoch 00006: val_acc improved from 0.98860 to 0.99100, saving model to best.model
Epoch 7/100

Epoch 00007: val_acc improved from 0.99100 to 0.99320, saving model to best.model
Epoch 8/100

Epoch 00008: val_acc improved from 0.99320 to 0.99400, saving model to best.model
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.99400
Epoch 10/100

Epoch 00010: val_acc did not improve from 0.99400
Epoch 11/100

Epoch 00011: val_acc did not improve from 0.99400

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.7.
Epoc

<keras.callbacks.History at 0x7f53fa29ab70>

##Results

In [6]:
model = load_model('best.model')
score = model.evaluate(x_test, y_test, verbose=0)
print(f'Test loss: {score[0]: 0.5f}')
print(f'Test accuracy:{score[1]: 0.5f}')

Test loss:  0.02150
Test accuracy: 0.99220
