# MNIST digit classifier example

#### Imports

In [None]:
%matplotlib inline

import numpy as np

from matplotlib import pyplot as plt

try:
    from tqdm import tqdm_notebook
except ImportError:
    tqdm_notebook = None

import sklearn.cross_validation

import lasagne
import theano.tensor as T
from britefury_lasagne import basic_dnn, trainer, image_window_extractor, mnist



### Define network architecture

We define the `build_network` function that takes the input variables as an optional argument and build the network using the Lasagne API.

NOTE that the final dense layer does *NOT* use the `softmax` nonlinearity as it is supplied by the classifier builder (see below).

In [None]:
def build_network(input_vars=None):
    # Input layer
    x_var = input_vars[0] if input_vars is not None else None
    net = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                        input_var=x_var)

    # A 32 unit 5x5 conv layer, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(5, 5), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # Two 32 unit 3x3 conv layers, followed by 2x2 max-pool
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.Conv2DLayer(net, num_filters=32, filter_size=(3, 3), W=lasagne.init.HeUniform())
    net = lasagne.layers.MaxPool2DLayer(net, pool_size=(2, 2))

    # A fully-connected layer of 64 units followed by 50% dropout
    net = lasagne.layers.DenseLayer(net, num_units=64, W=lasagne.init.HeUniform())
    net = lasagne.layers.DropoutLayer(net, p=0.5)

    # Final 10-unit dense layer, with no nonlinearity
    net = lasagne.layers.DenseLayer(net, num_units=10, nonlinearity=None)

    return net

#### Load the train, validation and test sets

In [None]:
dataset = mnist.MNIST()


### Train the classifier

In [None]:
# Build the image classifier for the given model builder
print('Building network')
clf = basic_dnn.simple_classifier(build_network, n_input_spatial_dims=2,
            updates_fn=lambda loss, params: lasagne.updates.adam(loss, params, learning_rate=0.001))

# Train with a batch size of 128, for 10 epochs, reporting after each epoch.
print('Training')
clf.train([dataset.train_X[:2048], dataset.train_y[:2048]],
          [dataset.val_X[:2048], dataset.val_y[:2048]],
          [dataset.test_X[:2048], dataset.test_y[:2048]], batchsize=128,
          num_epochs=10, verbosity=trainer.VERBOSITY_EPOCH,
          progress_iter_func=tqdm_notebook)

### Use the classifier to predict on the test set

In [None]:
# Predict probabilities for test samples
test_y_pred_prob = clf.predict([dataset.test_X])[0]
# Use `np.argmax` to get class predictions
test_y_pred = np.argmax(test_y_pred_prob, axis=1)

# Show the error rate
print('Test error rate={:.2%}'.format(np.mean(test_y_pred != dataset.test_y)))