# MNIST digit classifier example

#### Imports

In [1]:
%matplotlib inline

import numpy as np

from matplotlib import pyplot as plt

import sklearn.cross_validation

import lasagne
import theano.tensor as T
from britefury_lasagne import basic_dnn, trainer, image_window_extractor

from fuel.datasets.mnist import MNIST
import fuel

EVIL HACK: Disable cuDNN check


DEBUG: nvcc STDOUT mod.cu
   Creating library D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmptrun7i/265abc51f7c376c224983485238ff1a5.lib and object D:/temp/theano/compiledir_Windows-10-10.0.10586-Intel64_Family_6_Model_58_Stepping_9_GenuineIntel-2.7.11-64/tmptrun7i/265abc51f7c376c224983485238ff1a5.exp

Using gpu device 0: GeForce GTX 970 (CNMeM is enabled with initial size: 25.0% of memory, cuDNN 5103)


### Define network architecture

We define the `build_network` function that takes the input variables as an optional argument and build the network using the Lasagne API.

NOTE that the final dense layer does *NOT* use the `softmax` nonlinearity as it is supplied by the classifier builder (see below).

In [2]:
def build_network(input_vars=None):
    # Input layer
    x_var = input_vars[0] if input_vars is not None else None
    net = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                        input_var=x_var)

    # Two 32 unit 3x3 conv layers, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(5, 5), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # Two 32 unit 3x3 conv layers, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # A fully-connected layer of 64 units followed by 50% dropout
    net = lasagne.layers.DenseLayer(net, num_units=64, W=lasagne.init.HeUniform())
    net = lasagne.layers.DropoutLayer(net, p=0.5)

    # Final 10-unit dense layer, with no nonlinearity
    net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=None)

    return net

#### Load the train, validation and test sets

In [3]:
mnist_train = MNIST(which_sets=['train'], load_in_memory=True, subset=slice(0, 50000))
mnist_val = MNIST(which_sets=['train'], load_in_memory=True, subset=slice(50000, None))
mnist_test = MNIST(which_sets=['test'], load_in_memory=True)

### Train the classifier

In [5]:
# Build the image classifier for the given model builder
print 'Building network'
clf = basic_dnn.simple_classifier(build_network, n_input_spatial_dims=2, target_channel_index=0,
            updates_fn=lambda loss, params: lasagne.updates.adam(loss, params, learning_rate=0.001))

# Set verbosity
clf.trainer.report(verbosity=trainer.VERBOSITY_EPOCH)

# Set training length
clf.trainer.train_for(num_epochs=10)

# Train
print 'Training'
clf.trainer.train(mnist_train, mnist_val, mnist_test, batchsize=128)

Building network
Training
Epoch 1/10 took 5.27s:  TRAIN y loss=0.424349  VAL y loss=0.078494 err=2.31%  TEST y loss=0.069636 err=2.09%
Epoch 2/10 took 5.92s:  TRAIN y loss=0.141408  VAL y loss=0.057037 err=1.56%  TEST y loss=0.052428 err=1.66%
Epoch 3/10 took 5.68s:  TRAIN y loss=0.101619  VAL y loss=0.044065 err=1.32%  TEST y loss=0.035886 err=1.16%
Epoch 4/10 took 5.71s:  TRAIN y loss=0.079419  VAL y loss=0.047646 err=1.30%  TEST y loss=0.032739 err=1.04%
Epoch 5/10 took 5.48s:  TRAIN y loss=0.071349  VAL y loss=0.044890 err=1.34%
Epoch 6/10 took 5.32s:  TRAIN y loss=0.060336  VAL y loss=0.040141 err=1.08%  TEST y loss=0.027523 err=0.92%
Epoch 7/10 took 6.08s:  TRAIN y loss=0.053391  VAL y loss=0.036406 err=1.01%  TEST y loss=0.022415 err=0.73%
Epoch 8/10 took 5.51s:  TRAIN y loss=0.047946  VAL y loss=0.033787 err=0.84%  TEST y loss=0.025142 err=0.85%
Epoch 9/10 took 5.58s:  TRAIN y loss=0.045110  VAL y loss=0.032317 err=0.88%
Epoch 10/10 took 5.14s:  TRAIN y loss=0.039169  VAL y los

<britefury_lasagne.trainer.TrainingResults at 0x7d6056d8>

### Use the classifier to predict on the test set

In [15]:
# Get a Fuel dataset for MNIST test set, features only (no targets)
mnist_test_features = MNIST(which_sets=['test'], sources=['features'], load_in_memory=True)

# Predict probabilities for test samples
test_y_pred_prob = clf.predict(mnist_test_features)[0]
# Use `np.argmax` to get class predictions
test_y_pred = np.argmax(test_y_pred_prob, axis=1)

# Get the ground truths
state = mnist_test.open()
test_y = mnist_test.get_data(state, request=slice(None))[1]

# Show the error rate
print 'Test error rate={:.2%}'.format(np.mean(test_y_pred != test_y[:,0]))

Test error rate=0.85%
