# MNIST digit classifier example

#### Imports

In [1]:
%matplotlib inline

import numpy as np

from matplotlib import pyplot as plt

import sklearn.cross_validation

import lasagne
import theano.tensor as T
from britefury_lasagne import basic_dnn, trainer, image_window_extractor, mnist



EVIL HACK: Disabled cuDNN check


DEBUG: nvcc STDOUT mod.cu
   Creating library c:/Geoff/theano_gpu_temp/compiledir_Windows-7-6.1.7601-SP1-Intel64_Family_6_Model_60_Stepping_3_GenuineIntel-2.7.12-64/tmpr6qlpp/265abc51f7c376c224983485238ff1a5.lib and object c:/Geoff/theano_gpu_temp/compiledir_Windows-7-6.1.7601-SP1-Intel64_Family_6_Model_60_Stepping_3_GenuineIntel-2.7.12-64/tmpr6qlpp/265abc51f7c376c224983485238ff1a5.exp

Using gpu device 0: GeForce GTX 980 (CNMeM is enabled with initial size: 25.0% of memory, cuDNN 4007)


### Define network architecture

We define the `build_network` function that takes the input variables as an optional argument and build the network using the Lasagne API.

NOTE that the final dense layer does *NOT* use the `softmax` nonlinearity as it is supplied by the classifier builder (see below).

In [2]:
def build_network(input_vars=None):
    # Input layer
    x_var = input_vars[0] if input_vars is not None else None
    net = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                        input_var=x_var)

    # A 32 unit 5x5 conv layer, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(5, 5), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # Two 32 unit 3x3 conv layers, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # A fully-connected layer of 64 units followed by 50% dropout
    net = lasagne.layers.DenseLayer(net, num_units=64, W=lasagne.init.HeUniform())
    net = lasagne.layers.DropoutLayer(net, p=0.5)

    # Final 10-unit dense layer, with no nonlinearity
    net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=None)

    return net

#### Load the train, validation and test sets

In [3]:
dataset = mnist.MNIST()


### Train the classifier

In [4]:
# Build the image classifier for the given model builder
print('Building network')
clf = basic_dnn.simple_classifier(build_network, n_input_spatial_dims=2,
            updates_fn=lambda loss, params: lasagne.updates.adam(loss, params, learning_rate=0.001))

# Set verbosity
clf.trainer.report(verbosity=trainer.VERBOSITY_EPOCH)

# Set training length
clf.trainer.train_for(num_epochs=10)

# Train
print('Training')
clf.trainer.train([dataset.train_X, dataset.train_y],
                  [dataset.val_X, dataset.val_y],
                  [dataset.test_X, dataset.test_y], batchsize=128)

Building network
Training
Epoch 1/10 took 3.75s:  TRAIN y loss=0.441689  VAL y loss=0.084613 err=2.61%  TEST y loss=0.084419 err=2.65%
Epoch 2/10 took 1.78s:  TRAIN y loss=0.140706  VAL y loss=0.060268 err=1.85%  TEST y loss=0.052837 err=1.75%
Epoch 3/10 took 1.80s:  TRAIN y loss=0.106317  VAL y loss=0.048532 err=1.39%  TEST y loss=0.042325 err=1.30%
Epoch 4/10 took 1.76s:  TRAIN y loss=0.082908  VAL y loss=0.043994 err=1.17%  TEST y loss=0.034813 err=1.17%
Epoch 5/10 took 1.75s:  TRAIN y loss=0.073172  VAL y loss=0.041307 err=1.06%  TEST y loss=0.033449 err=1.07%
Epoch 6/10 took 1.68s:  TRAIN y loss=0.064170  VAL y loss=0.042681 err=1.10%
Epoch 7/10 took 1.76s:  TRAIN y loss=0.055373  VAL y loss=0.042626 err=0.98%  TEST y loss=0.031913 err=0.95%
Epoch 8/10 took 1.67s:  TRAIN y loss=0.048138  VAL y loss=0.040988 err=1.01%
Epoch 9/10 took 2.11s:  TRAIN y loss=0.045882  VAL y loss=0.038105 err=0.99%
Epoch 10/10 took 1.68s:  TRAIN y loss=0.040990  VAL y loss=0.040317 err=0.98%
Final resul

<britefury_lasagne.trainer.TrainingResults at 0x44aa2eb8>

### Use the classifier to predict on the test set

In [5]:
# Predict probabilities for test samples
test_y_pred_prob = clf.predict([dataset.test_X])[0]
# Use `np.argmax` to get class predictions
test_y_pred = np.argmax(test_y_pred_prob, axis=1)

# Show the error rate
print('Test error rate={:.2%}'.format(np.mean(test_y_pred != dataset.test_y)))

Test error rate=0.95%
