In [None]:
import os.path
from IPython.display import Image

from util import Util
u = Util()

import random
import numpy as np
# Explicit random seed for reproducibility
np.random.seed(1337)  

In [None]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

In [None]:
from keras.datasets import mnist

In [None]:
batch_size = 512
nb_classes = 10
nb_epoch = 800
# checkpoint path
checkpoints_dir = "checkpoints"

In [None]:
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
nb_filters1 = 20
nb_filters2 = 40
# size of pooling area for max pooling
pool_size1 = (2, 2)
pool_size2 = (3, 3)
# convolution kernel size
kernel_size1 = (4, 4)
kernel_size2 = (5, 5)
# dense layer size
dense_layer_size1 = 200
# dropout rate
dropout = 0.15
# activation type
activation = 'relu'

In [None]:
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
u.plot_images(X_train[0:9], y_train[0:9])

In [None]:
if K.image_dim_ordering() == 'th':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

In [None]:
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [None]:
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=False)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train)

In [None]:
def initialize_network(model, dropout1=dropout, dropout2=dropout, loss='categorical_crossentropy'):
    model.add(Convolution2D(nb_filters1, kernel_size1[0], kernel_size1[1],
                            border_mode='valid',
                            input_shape=input_shape, name='covolution_1_' + str(nb_filters1) + '_filters'))
    model.add(Activation(activation, name='activation_1_' + activation))
    model.add(MaxPooling2D(pool_size=pool_size1, name='max_pooling_1_' + str(pool_size1) + '_pool_size'))
    model.add(Convolution2D(nb_filters2, kernel_size2[0], kernel_size2[1]))
    model.add(Activation(activation, name='activation_2_' + activation))
    model.add(MaxPooling2D(pool_size=pool_size2, name='max_pooling_1_' + str(pool_size2) + '_pool_size'))
    model.add(Dropout(dropout))

    model.add(Flatten())
    model.add(Dense(dense_layer_size1, name='fully_connected_1_' + str(dense_layer_size1) + '_neurons'))
    model.add(Activation(activation, name='activation_3_' + activation))
    model.add(Dropout(dropout))
    model.add(Dense(nb_classes, name='output_' + str(nb_classes) + '_neurons'))
    model.add(Activation('softmax', name='softmax'))

    model.compile(loss=loss,
                  optimizer='adadelta',
                  metrics=['accuracy', 'precision', 'recall', 'mean_absolute_error'])
    
    # loading weights from checkpoints 
    if os.path.exists(checkpoints_filepath):
        model.load_weights(checkpoints_filepath)
    else: 
        print('Warning: ' + checkpoints_filepath + ' could not be loaded')

In [None]:
model1 = Sequential()
model2 = Sequential()
model3 = Sequential()
model4 = Sequential()
model5 = Sequential()

models = [model1, model2, model3, model4, model5]
seeds = [1337, 22, 73, 42, 1990]

for model in models:
    initialize_network(model)

In [None]:
def fit(model, checkpoints_name='test', seed=1337, verbose=1, window_size=(-1)):
    if window_size == (-1):
        window = random.randint(1,15)
    else:
        window = window_size
    print("Window: " + str(window))
    checkpoints_filepath = os.path.join(checkpoints_dir, '04_MNIST_weights.best_' + checkpoints_file + '.hdf5')
    # checkpoint
    checkpoint = ModelCheckpoint(checkpoints_filepath, monitor='val_precision', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

    # fits the model on batches with real-time data augmentation, for nb_epoch-100 epochs
    history = model.fit_generator(datagen.flow(X_train, Y_train, 
                                                      batch_size=batch_size, 
                                                      # save_to_dir='distorted_data', 
                                                      # save_format='png'
                                                      seed=1337),
                            samples_per_epoch=len(X_train), nb_epoch=(nb_epoch-window), verbose=0, 
                            validation_data=(X_test, Y_test), callbacks=callbacks_list)

    # ensuring best val_precision reached during training
    model.load_weights(checkpoints_filepath)

    # fits the model on clear training set, for nb_epoch-700 epochs
    history_cont = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=window,
                                    verbose=0, validation_data=(X_test, Y_test), callbacks=callbacks_list)

    # ensuring best val_precision reached during training
    model.load_weights(checkpoints_filepath)
    return history, history_cont

In [None]:
for index, model in enumerate(models):
    print("Training model " + str(index) + " ...")
    if index == 0:
        window_size = 20
    else:
        window_size = (-1)
    history, history_cont = fit(model, str(index), seed=seeds[index], verbose=0, window_size=window_size)
    print("Done.")
    print("History: ")
    u.plot_history(history)
    u.plot_history(history, 'precision')
    print("Continuation of training with no pre-processing:")
    u.plot_history(history_cont)
    u.plot_history(history_cont, 'precision')
    print('evaluating model ' + str(index))
    score = model.evaluate(X_test, Y_test, verbose=0)
    print('Test accuracy:', score[1]*100, '%')
    print('Test error:', (1-score[2])*100, '%')

In [None]:
merged_model = Sequential()
merged_model.add(Merge(models, mode='ave')

In [None]:
print('Evaluating ensemble')
score = merged_model.evaluate(X_test, Y_test, verbose=0)
print('Test accuracy:', score[1]*100, '%')
print('Test error:', (1-score[2])*100, '%')