# Computational Cognitive Neuroscience Practical Assignment 1
## Training an MLP on MNIST
### Tommy Clausner (s4836219) and Steven Smits (s4237263)

In [1]:
from __future__ import print_function
import numpy as np
import chainer
from chainer.functions.evaluation import accuracy
from chainer.functions.loss import softmax_cross_entropy
from chainer import link
from chainer import reporter
from chainer import optimizers
import chainer.functions as F
import chainer.links as L
from chainer.datasets import TupleDataset
import matplotlib.pyplot as plt

ImportError: No module named chainer

## First we define all the functions given by the teacher.

In [None]:
def get_mnist(n_train=100, n_test=100, n_dim=1, with_label=True, classes = None):
    """

    :param n_train: nr of training examples per class
    :param n_test: nr of test examples per class
    :param n_dim: 1 or 3 (for convolutional input)
    :param with_label: whether or not to also provide labels
    :param classes: if not None, then it selects only those classes, e.g. [0, 1]
    :return:
    """

    train_data, test_data = chainer.datasets.get_mnist(ndim=n_dim, withlabel=with_label)

    if not classes:
        classes = np.arange(10)
    n_classes = len(classes)

    if with_label:

        for d in range(2):

            if d==0:
                data = train_data._datasets[0]
                labels = train_data._datasets[1]
                n = n_train
            else:
                data = test_data._datasets[0]
                labels = test_data._datasets[1]
                n = n_test

            for i in range(n_classes):
                lidx = np.where(labels == classes[i])[0][:n]
                if i==0:
                    idx = lidx
                else:
                    idx = np.hstack([idx,lidx])

            L = np.concatenate([i*np.ones(n) for i in np.arange(n_classes)]).astype('int32')

            if d==0:
                train_data = TupleDataset(data[idx],L)
            else:
                test_data = TupleDataset(data[idx],L)

    else:

        tmp1, tmp2 = chainer.datasets.get_mnist(ndim=n_dim,withlabel=True)

        for d in range(2):

            if d == 0:
                data = train_data
                labels = tmp1._datasets[1]
                n = n_train
            else:
                data = test_data
                labels = tmp2._datasets[1]
                n = n_test

            for i in range(n_classes):
                lidx = np.where(labels == classes[i])[0][:n]
                if i == 0:
                    idx = lidx
                else:
                    idx = np.hstack([idx, lidx])

            if d == 0:
                train_data = data[idx]
            else:
                test_data = data[idx]

    return train_data, test_data

class RandomIterator(object):
    """
    Generates random subsets of data
    """

    def __init__(self, data, batch_size=1):
        """

        Args:
            data (TupleDataset):
            batch_size (int):

        Returns:
            list of batches consisting of (input, output) pairs
        """

        self.data = data

        self.batch_size = batch_size
        self.n_batches = len(self.data) // batch_size

    def __iter__(self):

        self.idx = -1
        self._order = np.random.permutation(len(self.data))[:(self.n_batches * self.batch_size)]

        return self

    def next(self):

        self.idx += 1

        if self.idx == self.n_batches:
            raise StopIteration

        i = self.idx * self.batch_size

        # handles unlabeled and labeled data
        if isinstance(self.data, np.ndarray):
            return self.data[self._order[i:(i + self.batch_size)]]
        else:
            return list(self.data[self._order[i:(i + self.batch_size)]])

class Classifier(link.Chain):

    """A simple classifier model.
    This is an example of chain that wraps another chain. It computes the
    loss and accuracy based on a given input/label pair.
    Args:
        predictor (~chainer.Link): Predictor network.
        lossfun (function): Loss function.
        accfun (function): Function that computes accuracy.
    Attributes:
        predictor (~chainer.Link): Predictor network.
        lossfun (function): Loss function.
        accfun (function): Function that computes accuracy.
        y (~chainer.Variable): Prediction for the last minibatch.
        loss (~chainer.Variable): Loss value for the last minibatch.
        accuracy (~chainer.Variable): Accuracy for the last minibatch.
        compute_accuracy (bool): If ``True``, compute accuracy on the forward
            computation. The default value is ``True``.
    """

    compute_accuracy = True

    def __init__(self, predictor,
                 lossfun=softmax_cross_entropy.softmax_cross_entropy,
                 accfun=accuracy.accuracy):
        super(Classifier, self).__init__()
        self.lossfun = lossfun
        self.accfun = accfun
        self.y = None
        self.loss = None
        self.accuracy = None

        with self.init_scope():
            self.predictor = predictor

    def __call__(self, *args):
        """Computes the loss value for an input and label pair.
        It also computes accuracy and stores it to the attribute.
        Args:
            args (list of ~chainer.Variable): Input minibatch.
        The all elements of ``args`` but last one are features and
        the last element corresponds to ground truth labels.
        It feeds features to the predictor and compare the result
        with ground truth labels.
        Returns:
            ~chainer.Variable: Loss value.
        """

        assert len(args) >= 2
        x = args[:-1]
        t = args[-1]
        self.y = None
        self.loss = None
        self.accuracy = None
        self.y = self.predictor(*x)
        self.loss = self.lossfun(self.y, t)
        reporter.report({'loss': self.loss}, self)
        if self.compute_accuracy:
            self.accuracy = self.accfun(self.y, t)
            reporter.report({'accuracy': self.accuracy}, self)
        return self.loss

## We define an MLP with one hidden layer

In [None]:
class MLP(chainer.Chain):
    """Multilayered Perceptron with 1 hidden layer. It takes n_units as input
        , which is the number of hidden layer units. In addition, it takes n_out as input, specifying output unit of
        last layer, this is 10 because we're working with 10 possible [True/False] outcomes."""

    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
            # No need for input number, it can infer this.
            l1 = L.Linear(None, n_units), # Input to layer 1
            l2 = L.Linear(None, n_units), # Layer out
        )

    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        y = self.l2(h1)
        return y

## The main function that implements all the aforementioned functions

In [None]:
def DoMLP():
    """ Algorithm that implements the Multilayered Perceptron using chainer. Takes no input. Trains MLP on MNIST data obtained
        from chainer.It calculates the loss and accuracy using the classifier, which it automatically prints per epoch and plots
        when the MLP is done. """
    # Stable factors specification
    batchsize = 32  # Training batchsize, blackboard specified 32
    epoch = 20  # Training epochs, blackboard specified 20
    unit = 10  # Hidden layer units, blackboard specified 10

    # Our model of the neural network
    model = MLP(unit, 10)

    # Classifier that calculates the loss and accuracy of the model
    classifier_model = Classifier(model)

    # Setup an optimizer
    optimizer = optimizers.SGD()  # Using Stochastic Gradient Descent
    optimizer.setup(classifier_model)

    # Load the MNIST dataset
    train, test = get_mnist()  # Get mnist data.

    n_epoch = epoch
    N = len(train)  # training data size
    N_test = len(test)  # test data size

    accplot = np.zeros((n_epoch, 1), dtype=float)  # Store  test accuracy for plot
    lossplot = np.zeros((n_epoch, 1), dtype=float)  # Store test loss for plot

    accplot_train = np.zeros((n_epoch, 1), dtype=float) # Store train accuracy for plot
    lossplot_train = np.zeros((n_epoch, 1), dtype=float)  # Store train loss for plot

    # Training the MLP for epochs
    for epoch in range(1, n_epoch + 1):  # start with epoch 1 (instead of 0)
        print('epoch', epoch)

        # training the MLP with the last chainer method from guide; no cleargrads()!
        perm = np.random.permutation(
            N)  # Including extra random permutation for resulted in higher accuracy than only RandomIterator.
        Currbatchtrain = RandomIterator(train)

        sum_accuracy_train = 0  # Creating a staring variable
        sum_loss_train = 0
        for i in range(0, N, batchsize):
            input = chainer.Variable(np.asarray(Currbatchtrain.data[perm[i:i + batchsize]][0]))
            target = chainer.Variable(np.asarray(Currbatchtrain.data[perm[i:i + batchsize]][1]))

            optimizer.update(classifier_model, input, target)  # Update the model using the classifier.

            sum_loss_train += float(classifier_model.loss.data) * len(target.data)  # Times length of current batch for relative impact
            sum_accuracy_train += float(classifier_model.accuracy.data) * len(target.data)


        # Testing the MLP
        sum_accuracy = 0  # Creating a staring variable
        sum_loss = 0
        for i in range(0, N_test, 100):
            idx = np.asarray(list(range(i, i + 100)))
            input = chainer.Variable(test[idx][0])  # No random iteration, because it's just a test
            target = chainer.Variable(test[idx][1])

            loss = classifier_model(input, target)
            sum_loss += float(loss.data) * len(target.data)  # Times length of current batch for relative impact
            sum_accuracy += float(classifier_model.accuracy.data) * len(target.data)

        print('mean loss =', (sum_loss / N_test), ', Accuracy =', sum_accuracy / N_test)  # To check values during process.
        accplot[epoch - 1] = sum_accuracy / N_test
        lossplot[epoch - 1] = sum_loss / N_test

        accplot_train[epoch - 1] = sum_accuracy_train / N_test
        lossplot_train[epoch - 1] = sum_loss_train / N_test

    # Plot the accuracy and loss at the end per epoch
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    ax1.plot(range(1, n_epoch + 1), accplot, 'r:', range(1, n_epoch + 1), accplot_train, 'r-',
             label='training set accuracy')
    ax1.set_xlabel('Training Epoch')
    ax1.set_xticks(range(1, n_epoch + 1))
    ax1.set_ylabel('accuracy')
    ax1.tick_params(axis='y', colors='red')
    ax2.plot(range(1, n_epoch + 1), lossplot, 'b:', range(1, n_epoch + 1), lossplot_train, 'b-',
             label='training set loss')
    ax2.set_ylabel('loss')
    ax2.tick_params(axis='y', colors='blue')
    ax2.legend(['test set loss', 'training set loss'], loc=2)
    ax1.legend(['test set accuracy', 'training set accuracy'], loc=3)
    plt.title('CCN - Assignment 1: MLP accuracy and loss')
    plt.show()

## Execution

In [None]:
DoMLP()

# Figure of the results from our end.

![Programming in Python](https://image.ibb.co/c1n3zQ/Figure_1.png)