In [1]:
import keras
import numpy as np
from keras.preprocessing import image
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Lambda, Flatten
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, BatchNormalization
from keras.optimizers import Adam

Using TensorFlow backend.


In [2]:
def basic_model():
    return Sequential([Lambda(normalize, input_shape=(28,28,1))])

def add_output_layer(model):
    model.add(Dense(10, activation="softmax"))

def convolutional_model(conv_blocks=2, hidden_dense_activation="relu", pool_size=2, num_dense_neurons=512, starting_filter_size=32, batchnorm=False, zero_padding=True, dropout=False):
    model = basic_model()
    for block in range(conv_blocks):
        num_filters = starting_filter_size * (block + 1)
        if zero_padding:
            model.add(ZeroPadding2D())
        model.add(Convolution2D(num_filters, 3, 3, activation="relu"))
        if batchnorm:
            model.add(BatchNormalization(axis=1))
        model.add(Convolution2D(num_filters, 3, 3, activation="relu"))
        model.add(MaxPooling2D(pool_size=pool_size))
        if batchnorm and block is not conv_blocks - 1:
            model.add(BatchNormalization(axis=1))
    model.add(Flatten())
    if batchnorm:
        model.add(BatchNormalization(axis=1))
    model.add(Dense(num_dense_neurons, activation=hidden_dense_activation))
    if batchnorm:
        model.add(BatchNormalization(axis=1))
    if dropout:
        model.add(Dropout(dropout))
    add_output_layer(model)
    return model

def normalize(x):
    x_mean = x_train.mean().astype(np.float32)
    x_std = x_train.std().astype(np.float32)
    return (x - x_mean) / x_std

def onehot(y):
    return keras.utils.np_utils.to_categorical(y)

def get_data(sample=False):
    if sample:
        x,y = x_train_sample, y_train_sample
        val_x, val_y = x_test_sample, y_test_sample
    else:
        x,y = x_train, y_train
        val_x, val_y = x_test, y_test
    return x, y, val_x, val_y

def compile_model(model):
    # Categorical cross entropy is used when you have more than 2 classes to compare against.
    # We have 10 classes for MNIST (digits 0 - 9), so thus we use it here.
    return model.compile(optimizer=Adam(), loss="categorical_crossentropy", metrics=["accuracy"])

def fit(model, learning_rate=None, nb_epoch=1, sample=False):
    model.optimizer.lr = learning_rate if learning_rate else model.optimizer.lr
    x, y, val_x, val_y = get_data(sample=sample)
    model.fit(x, y, batch_size=64, nb_epoch=nb_epoch, validation_data=(val_x, val_y))
    
def multi_fit(model, reset=True, augmentation=False, sample=False, runs=1):
    for run in range(runs):
        for learning_rate in [0.001, 0.01, 0.1]:
            print("Fitting with learning rate of: ", learning_rate)
            fit(model, learning_rate=learning_rate, sample=sample)
            if reset:
                # Resetting is nice here for comparing differences in learning rate, without the compounding factor
                # of model state across epochs
                model.reset_states()



In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_test = np.expand_dims(x_test, -1)
x_train = np.expand_dims(x_train, -1)
y_train = onehot(y_train)
y_test = onehot(y_test)

sample_size = 2000
x_test_sample = x_test[:sample_size]
x_train_sample = x_train[:sample_size]
y_train_sample = y_train[:sample_size]
y_test_sample = y_test[:sample_size]

In [4]:
x_train_sample.shape

(2000, 28, 28, 1)

In [5]:
model = convolutional_model(conv_blocks=1, starting_filter_size=3, pool_size=(4,4), hidden_dense_activation="relu", num_dense_neurons=8)
compile_model(model)

In [6]:
multi_fit(model, sample=True, runs=10)

Fitting with learning rate of:  0.001
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.01
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.1
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.001
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.01
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.1
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.001
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.01
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.1
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.001
Train on 2000 samples, validate on 2000 samples
Epoch 1/1
Fitting with learning rate of:  0.01
Train on 200