In [None]:
%matplotlib inline
from __future__ import division, print_function

import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Lambda
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.utils import np_utils
from keras import backend as K

## Setup

In [None]:
batch_size = 128
nb_classes = 10
np_epoch = 12

img_rows, img_cols = 28, 28
nb_filters = 32
pool_size = (2, 2)
kernel_size = (3, 3)

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

In [None]:
X_train.shape[0]

In [None]:
# X_test = np.expand_dims(X_test,1)
# X_train = np.expand_dims(X_train,1)

# Different configuration for backends: theano and tensorflow
if K.image_dim_ordering() == 'th':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [None]:
X_train.shape

In [None]:
y_train[:5]

In [None]:
y_train = np_utils.to_categorical(y_train, nb_classes)
y_test = np_utils.to_categorical(y_test, nb_classes)

In [None]:
y_train[:5]

In [None]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [None]:
def norm_input(x): return (x-mean_px)/std_px

## Linear model

In [None]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=input_shape),
        Flatten(),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
lm = get_lin_model()

In [None]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)

In [None]:
lm.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
lm.optimizer.lr=0.1

In [None]:
lm.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
lm.optimizer.lr=0.01

In [None]:
lm.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

## Single dense layer

In [None]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=input_shape),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
fc = get_fc_model()

In [None]:
fc.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
fc.optimizer.lr=0.1

In [None]:
fc.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
fc.optimizer.lr=0.01

In [None]:
fc.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

## Basic 'VGG-style' CNN

In [None]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=input_shape),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = get_model()

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

## Data augmentation

In [None]:
model = get_model()

In [None]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.001

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=14, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.0001

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=10, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

## Batchnorm + data augmentation

In [None]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=input_shape),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = get_model_bn()

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.001

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

## Batchnorm + dropout + data augmentation

In [None]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = get_model_bn_do()

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

In [None]:
model.optimizer.lr=0.001

In [None]:
model.fit_generator(batches, batches.n, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.n)

## Ensembling

In [None]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.n, nb_epoch=1, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    model.optimizer.lr=0.1
    model.fit_generator(batches, batches.n, nb_epoch=4, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    model.optimizer.lr=0.01
    model.fit_generator(batches, batches.n, nb_epoch=12, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    model.optimizer.lr=0.001
    model.fit_generator(batches, batches.n, nb_epoch=18, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.n)
    return model

In [None]:
models = [fit_model() for i in range(6)]

In [None]:
path = "data/mnist/"
model_path = path + 'models/'

In [None]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [None]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])

In [None]:
evals.mean(axis=0)

In [None]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [None]:
all_preds.shape

In [None]:
avg_preds = all_preds.mean(axis=0)

In [None]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()