# MNIST digit classifier example

Turn of cuDNN as its convolution operations are *not reproducible*.

In [1]:
import os
os.environ['THEANO_FLAGS'] = 'dnn.enabled=False, optimizer_including='

#### Imports

In [2]:
import numpy as np

from matplotlib import pyplot as plt

import sklearn.cross_validation

import lasagne
import theano.tensor as T
from britefury_lasagne import basic_dnn, trainer, image_window_extractor

from fuel.datasets.mnist import MNIST
import fuel

Using gpu device 0: GeForce GTX 970 (CNMeM is enabled with initial size: 25.0% of memory, cuDNN not available)


### Define network architecture

We define the `build_network` function that takes the input variables as an optional argument and build the network using the Lasagne API.

NOTE that the final dense layer does *NOT* use the `softmax` nonlinearity as it is supplied by the classifier builder (see below).

In [3]:
def build_network(input_vars=None):
    # Input layer
    x_var = input_vars[0] if input_vars is not None else None
    net = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                        input_var=x_var)

    # Two 32 unit 3x3 conv layers, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(5, 5), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # Two 32 unit 3x3 conv layers, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # A fully-connected layer of 64 units followed by 50% dropout
    net = lasagne.layers.DenseLayer(net, num_units=64, W=lasagne.init.HeUniform())
    net = lasagne.layers.DropoutLayer(net, p=0.5)

    # Final 10-unit dense layer, with no nonlinearity
    net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=None)

    return net

Define a function for resetting the random number generator seeds of the noise layers (e.g. dropout):

In [4]:
def reset_noise_seeds(final_layer, rng):
    for l in lasagne.layers.get_all_layers(final_layer):
        if isinstance(l, (lasagne.layers.DropoutLayer, lasagne.layers.GaussianNoiseLayer)):
            l._srng.set_rstate(rng.randint(1, 2147462579))

#### Load the train, validation and test sets

In [5]:
mnist_train = MNIST(which_sets=['train'], load_in_memory=True, subset=slice(0, 50000))
mnist_val = MNIST(which_sets=['train'], load_in_memory=True, subset=slice(50000, None))
mnist_test = MNIST(which_sets=['test'], load_in_memory=True)

### Train the classifier

In [6]:
# Create SEPARATE random number generators for generating weights, noise seeds and shuffling,
# with specific seeds so that we can re-try the experiment below to check that we get the same result
# Creating a network's layers draws from an RNG in order to create randomly initialised network weights.
# Since the third experiment does not create new layers, we need separate RNGs for the noise and shuffling
# steps so that we can ensure that they operate the same way each time
weight_rng1 = np.random.RandomState(12345)
noise_rng1 = np.random.RandomState(67890)
shuffle_rng1 = np.random.RandomState(24680)

# Set Lasagne's RNG
lasagne.random.set_rng(weight_rng1)

# Build the image classifier for the given model builder
print 'Building network'
clf = basic_dnn.simple_classifier(build_network, n_input_spatial_dims=2, target_channel_index=0,
            updates_fn=lambda loss, params: lasagne.updates.sgd(loss, params, learning_rate=0.001))

reset_noise_seeds(clf.final_layers, noise_rng1)

# Get the randomly initialised parameter values so that we can try to re-start training without building
# the net from scratch
blank_state = clf.get_param_values(include_updates=True)

# Set verbosity
clf.trainer.report(verbosity=trainer.VERBOSITY_EPOCH)

# Set training length
clf.trainer.train_for(num_epochs=3)

# Train
print 'Training'
clf.trainer.train(mnist_train, mnist_val, mnist_test, batchsize=128, shuffle_rng=shuffle_rng1)

clf_state = clf.get_param_values()

Building network


DEBUG: nvcc STDOUT mod.cu
   Creating library D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmp6fcseg/81125e2c9964bf5b883ec60c08118404.lib and object D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmp6fcseg/81125e2c9964bf5b883ec60c08118404.exp

DEBUG: nvcc STDOUT mod.cu
   Creating library D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmplhg_jc/378931d0d188abc1eb9a51d3938507cc.lib and object D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmplhg_jc/378931d0d188abc1eb9a51d3938507cc.exp

DEBUG: nvcc STDOUT mod.cu
   Creating library D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmpf0kaco/b6437dbe3a70d5ad2b089b5e10d3d274.lib and object D:/temp/theano/compiledir_Windows-10-10.0.10586-Inte

Training
Epoch 1/3 took 9.65s:  TRAIN y loss=2.257261  VAL y loss=2.145790 err=64.02%  TEST y loss=2.151802 err=65.19%
Epoch 2/3 took 9.87s:  TRAIN y loss=2.074777  VAL y loss=1.878938 err=40.17%  TEST y loss=1.885863 err=40.87%
Epoch 3/3 took 9.74s:  TRAIN y loss=1.766350  VAL y loss=1.388147 err=22.81%  TEST y loss=1.395608 err=23.42%
Final result:
Epoch 3/3 took 29.26s:  TRAIN y loss=1.766350  VAL y loss=1.388147 err=22.81%  TEST y loss=1.395608 err=23.42%


Once more:

In [7]:
weight_rng2 = np.random.RandomState(12345)
noise_rng2 = np.random.RandomState(67890)
shuffle_rng2 = np.random.RandomState(24680)

lasagne.random.set_rng(weight_rng2)

# Build the image classifier for the given model builder
print 'Building network'
clf2 = basic_dnn.simple_classifier(build_network, n_input_spatial_dims=2, target_channel_index=0,
            updates_fn=lambda loss, params: lasagne.updates.sgd(loss, params, learning_rate=0.001))

reset_noise_seeds(clf2.final_layers, noise_rng2)

# Get the randomly initialised parameter values so that we can try to re-start training without building
# the net from scratch
blank_state2 = clf2.get_param_values(include_updates=True)

# Set verbosity
clf2.trainer.report(verbosity=trainer.VERBOSITY_EPOCH)

# Set training length
clf2.trainer.train_for(num_epochs=3)

# Train
print 'Training'
clf2.trainer.train(mnist_train, mnist_val, mnist_test, batchsize=128, shuffle_rng=shuffle_rng2)

clf2_state = clf2.get_param_values()

Building network
Training
Epoch 1/3 took 10.04s:  TRAIN y loss=2.257261  VAL y loss=2.145790 err=64.02%  TEST y loss=2.151802 err=65.19%
Epoch 2/3 took 10.26s:  TRAIN y loss=2.074777  VAL y loss=1.878938 err=40.17%  TEST y loss=1.885863 err=40.87%
Epoch 3/3 took 9.91s:  TRAIN y loss=1.766350  VAL y loss=1.388147 err=22.81%  TEST y loss=1.395608 err=23.42%
Final result:
Epoch 3/3 took 30.21s:  TRAIN y loss=1.766350  VAL y loss=1.388147 err=22.81%  TEST y loss=1.395608 err=23.42%


Reset the state of the first classifier and train again:

In [8]:
weight_rng3 = np.random.RandomState(12345)
noise_rng3 = np.random.RandomState(67890)
shuffle_rng3 = np.random.RandomState(24680)

# Reset parameter state
clf.set_param_values(blank_state, include_updates=True)

# Reset noise seeds
reset_noise_seeds(clf.final_layers, noise_rng3)

# Train
print 'Training'
clf.trainer.train(mnist_train, mnist_val, mnist_test, batchsize=128, shuffle_rng=shuffle_rng3)

clf_state_b = clf.get_param_values()

Training
Epoch 1/3 took 9.77s:  TRAIN y loss=2.255214  VAL y loss=2.146062 err=62.93%  TEST y loss=2.152184 err=64.46%
Epoch 2/3 took 9.75s:  TRAIN y loss=2.079298  VAL y loss=1.881940 err=39.97%  TEST y loss=1.889157 err=40.48%
Epoch 3/3 took 9.80s:  TRAIN y loss=1.773211  VAL y loss=1.399116 err=22.60%  TEST y loss=1.407584 err=23.61%
Final result:
Epoch 3/3 took 29.32s:  TRAIN y loss=1.773211  VAL y loss=1.399116 err=22.60%  TEST y loss=1.407584 err=23.61%


Check parameters for equality:

In [9]:
def compare_states(s1, s2):
    for i, (a, b) in enumerate(zip(s1, s2)):
        if (a != b).any():
            print 'FAIL at index {}/{}'.format(i, len(s1))

In [10]:
compare_states(blank_state, blank_state2)

In [11]:
compare_states(clf_state, clf2_state)

In [12]:
compare_states(clf_state, clf2_state)

In [13]:
compare_states(clf_state, clf_state_b)

FAIL at index 0/10
FAIL at index 1/10
FAIL at index 2/10
FAIL at index 3/10
FAIL at index 4/10
FAIL at index 5/10
FAIL at index 6/10
FAIL at index 7/10
FAIL at index 8/10
FAIL at index 9/10
