In [1]:
from theano import gpuarray

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using Theano backend.


## Setup

In [3]:
batch_size=64

In [4]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [5]:
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)

In [6]:
X_train.shape

(60000, 1, 28, 28)

In [7]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [8]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [9]:
y_train[:5]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [10]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

In [11]:
def norm_input(x): return (x-mean_px)/std_px

## Linear model

In [None]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
lm = get_lin_model()

In [None]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)

In [None]:
lm.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
lm.optimizer.lr=0.1

In [None]:
lm.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
lm.optimizer.lr=0.01

In [None]:
lm.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

## Single dense layer

In [None]:
def get_fc_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Flatten(),
        Dense(512, activation='softmax'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
fc = get_fc_model()

In [None]:
fc.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
fc.optimizer.lr=0.1

In [None]:
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
fc.optimizer.lr=0.01

In [None]:
fc.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

## Basic 'VGG-style' CNN

In [None]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        Convolution2D(64,3,3, activation='relu'),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = get_model()

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

## Data augmentation

In [None]:
model = get_model()

In [12]:
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                               height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=8, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.001

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=14, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.0001

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=10, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

## Batchnorm + data augmentation

In [None]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28)),
        Convolution2D(32,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(32,3,3, activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        BatchNormalization(axis=1),
        Convolution2D(64,3,3, activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = get_model_bn()

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.1

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

In [None]:
model.optimizer.lr=0.001

In [None]:
model.fit_generator(batches, batches.N, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.N)

## Batchnorm + dropout + data augmentation

In [13]:
def get_model_bn_do():
    model = Sequential([
        Lambda(norm_input, input_shape=(1,28,28), output_shape=(1,28,28)),

        ZeroPadding2D((1,1)),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        ZeroPadding2D((1,1)),
        Conv2D(32, (3,3), activation='relu'),

        MaxPooling2D(),
        BatchNormalization(axis=1),

        ZeroPadding2D((1,1)),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        ZeroPadding2D((1,1)),
        Conv2D(64, (3,3), activation='relu'),

        MaxPooling2D(),
        BatchNormalization(axis=1),

        ZeroPadding2D((1,1)),
        Conv2D(64, (3,3), activation='relu'),

        MaxPooling2D(),

        Flatten(),
        BatchNormalization(),

        Dense(512, activation='relu'),
        Dropout(0.1),
        BatchNormalization(),

        Dense(128, activation='relu'),
        Dropout(0.25),
        BatchNormalization(),

        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [15]:
model = get_model_bn_do()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_1 (Lambda)            (None, 1, 28, 28)         0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 1, 30, 30)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 28, 28)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 28, 28)        128       
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 32, 30, 30)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 28, 28)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 14, 14)        0         
__________

In [16]:
model.optimizer.lr.astype(np.float32)

KeyError: <type 'numpy.float32'>

In [42]:
model.fit_generator(batches, batches.x.shape[0]//batches.batch_size, epochs=1, 
                    validation_data=test_batches, validation_steps=test_batches.x.shape[0])

Epoch 1/1

KeyboardInterrupt: 

In [None]:
model.optimizer.lr=0.1
model.fit_generator(batches, batches.x.shape[0]/batches.batch_size, nb_epoch=4, 
                    validation_data=test_batches, nb_val_samples=test_batches.x.shape[0])

In [None]:
model.optimizer.lr=0.01

In [None]:
model.fit_generator(batches, batches.x.shape[0]/batches.batch_size, nb_epoch=12, 
                    validation_data=test_batches, nb_val_samples=test_batches.x.shape[0])

In [None]:
model.optimizer.lr=0.001

In [None]:
model.fit_generator(batches, batches.x.shape[0]/batches.batch_size, nb_epoch=1, 
                    validation_data=test_batches, nb_val_samples=test_batches.x.shape[0])

## Ensembling

In [None]:
def fit_model():
    model = get_model_bn_do()
    model.fit_generator(batches, batches.N, nb_epoch=1, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.1
    model.fit_generator(batches, batches.N, nb_epoch=4, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.01
    model.fit_generator(batches, batches.N, nb_epoch=12, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    model.optimizer.lr=0.001
    model.fit_generator(batches, batches.N, nb_epoch=18, verbose=0,
                        validation_data=test_batches, nb_val_samples=test_batches.N)
    return model

In [None]:
#models = [fit_model() for i in range(6)]
models = [model]

In [None]:
path = "data/mnist/"
model_path = path + 'models/'

In [None]:
for i,m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [None]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])

In [None]:
evals.mean(axis=0)

In [None]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])

In [None]:
all_preds.shape

In [None]:
avg_preds = all_preds.mean(axis=0)

In [None]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()

In [None]:
y_test.shape

In [None]:
y_test.classes

In [None]:
y_test[0]

In [None]:
all_preds[0][0]

In [None]:
from keras.preprocessing import image

#Helper function to plot images by index in the validation set 
#Plots is a helper function in utils.py
def plots_idx(idx, titles=None):
    plots([image.load_img(valid_path + filenames[i]) for i in idx], titles=titles)
    
#Number of images to view for each visualization task
n_view = 4