# Convolutional Neural Networks in ``gluon``

Now let's see how succinctly we can express a convolutional neural network using ``gluon``. You might be relieved to find out that this too requires hardly any more code than logistic regression. 

In [1]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
import multiprocessing
mx.random.seed(1)

  import OpenSSL.SSL


## Set the context

In [2]:
mx.__version__

'1.1.0'

In [3]:
NUM_WORKERS = multiprocessing.cpu_count()

In [4]:
ctx = mx.gpu()
#ctx = mx.cpu()

## Grab the MNIST dataset

In [18]:
batch_size = 128
num_inputs = 784
num_outputs = 10
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True, num_workers=NUM_WORKERS*3)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False, num_workers=NUM_WORKERS*3)

## Define a convolutional neural network

Again, a few lines here is all we need in order to change the model. Let's add a couple of convolutional layers using ``gluon.nn``.

In [6]:
num_fc = 512
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))            
    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    # The Flatten layer collapses all axis, except the first one, into one axis.
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(num_fc, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

## Parameter initialization


In [7]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

## Softmax cross-entropy Loss

In [8]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## Optimizer

In [9]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

## Write evaluation loop to calculate accuracy

In [10]:
def check_label_shapes(labels, preds, wrap=False, shape=False):
    """Helper function for checking shape of label and prediction
    Parameters
    ----------
    labels : list of `NDArray`
        The labels of the data.
    preds : list of `NDArray`
        Predicted values.
    wrap : boolean
        If True, wrap labels/preds in a list if they are single NDArray
    shape : boolean
        If True, check the shape of labels and preds;
        Otherwise only check their length.
    """
    if not shape:
        label_shape, pred_shape = len(labels), len(preds)
    else:
        label_shape, pred_shape = labels.shape, preds.shape

    if label_shape != pred_shape:
        raise ValueError("Shape of labels {} does not match shape of "
                         "predictions {}".format(label_shape, pred_shape))

    if wrap:
        if isinstance(labels, nd.ndarray.NDArray):
            labels = [labels]
        if isinstance(preds, nd.ndarray.NDArray):
            preds = [preds]

    return labels, preds

In [11]:
class EvalMetric(object):
    """Base class for all evaluation metrics.
    .. note::
        This is a base class that provides common metric interfaces.
        One should not use this class directly, but instead create new metric
        classes that extend it.
    Parameters
    ----------
    name : str
        Name of this metric instance for display.
    output_names : list of str, or None
        Name of predictions that should be used when updating with update_dict.
        By default include all predictions.
    label_names : list of str, or None
        Name of labels that should be used when updating with update_dict.
        By default include all labels.
    """
    def __init__(self, name, output_names=None,
                 label_names=None, ctx=mx.cpu(), **kwargs):
        self.name = str(name)
        self.output_names = output_names
        self.label_names = label_names
        self.ctx = ctx
        self._kwargs = kwargs        
        self.reset()

    def __str__(self):
        return "EvalMetric: {}".format(dict(self.get_name_value()))

    def get_config(self):
        """Save configurations of metric. Can be recreated
        from configs with metric.create(**config)
        """
        config = self._kwargs.copy()
        config.update({
            'metric': self.__class__.__name__,
            'name': self.name,
            'output_names': self.output_names,
            'label_names': self.label_names})
        return config

    def update_dict(self, label, pred):
        """Update the internal evaluation with named label and pred
        Parameters
        ----------
        labels : OrderedDict of str -> NDArray
            name to array mapping for labels.
        preds : list of NDArray
            name to array mapping of predicted outputs.
        """
        if self.output_names is not None:
            pred = [pred[name] for name in self.output_names]
        else:
            pred = list(pred.values())

        if self.label_names is not None:
            label = [label[name] for name in self.label_names]
        else:
            label = list(label.values())

        self.update(label, pred)

    def update(self, labels, preds):
        """Updates the internal evaluation result.
        Parameters
        ----------
        labels : list of `NDArray`
            The labels of the data.
        preds : list of `NDArray`
            Predicted values.
        """
        raise NotImplementedError()

    def reset(self):
        """Resets the internal evaluation result to initial state."""
        self.num_inst = nd.zeros(1, ctx=self.ctx, dtype=np.int32)
        self.sum_metric = nd.zeros(1, ctx=self.ctx, dtype=np.int32)

    def get(self):
        """Gets the current evaluation result.
        Returns
        -------
        names : list of str
           Name of the metrics.
        values : list of float
           Value of the evaluations.
        """
        if self.num_inst == 0:
            return (self.name, float('nan'))
        else:
            return (self.name, (self.sum_metric.astype(np.float64) / self.num_inst.astype(np.float64)).asscalar())

    def get_name_value(self):
        """Returns zipped name and value pairs.
        Returns
        -------
        list of tuples
            A (name, value) tuple list.
        """
        name, value = self.get()
        if not isinstance(name, list):
            name = [name]
        if not isinstance(value, list):
            value = [value]
        return list(zip(name, value))

class Accuracy(EvalMetric):
    def __init__(self, axis=1, name='accuracy',
                 output_names=None, label_names=None, ctx=mx.cpu(), wait_to_read=False):
        super(Accuracy, self).__init__(
            name, axis=axis,
            output_names=output_names, label_names=label_names, ctx=ctx)
        self.axis = axis
        self.wait_to_read=wait_to_read

    def update(self, labels, preds):
        """Updates the internal evaluation result.
        Parameters
        ----------
        labels : list of `NDArray`
            The labels of the data with class indices as values, one per sample.
        preds : list of `NDArray`
            Prediction values for samples. Each prediction value can either be the class index,
            or a vector of likelihoods for all classes.
        """
        if self.wait_to_read:
            preds.wait_to_read()
            
        labels, preds = check_label_shapes(labels, preds, True)
       
        for label, pred_label in zip(labels, preds):
            if pred_label.shape != label.shape:
                pred_label = ndarray.argmax(pred_label, axis=self.axis)
            pred_label = pred_label.astype('int32')
            label = label.astype('int32')

            self.sum_metric += (pred_label.flatten() == label.flatten()).sum()
            self.num_inst += len(pred_label.flatten())
        


In [12]:
class Accuracy(EvalMetric):
    def __init__(self, axis=1, name='accuracy',
                 output_names=None, label_names=None, ctx=mx.cpu(), wait_to_read=False):
        super(Accuracy, self).__init__(
            name, axis=axis,
            output_names=output_names, label_names=label_names, ctx=ctx)
        self.axis = axis
        self.wait_to_read=wait_to_read

    def update(self, labels, preds):
        """Updates the internal evaluation result.
        Parameters
        ----------
        labels : list of `NDArray`
            The labels of the data with class indices as values, one per sample.
        preds : list of `NDArray`
            Prediction values for samples. Each prediction value can either be the class index,
            or a vector of likelihoods for all classes.
        """
        if self.wait_to_read:
            preds.wait_to_read()
            
        labels, preds = check_label_shapes(labels, preds, True)
       
        for label, pred_label in zip(labels, preds):
            if pred_label.shape != label.shape:
                pred_label = ndarray.argmax(pred_label, axis=self.axis)
            pred_label = pred_label.astype('int32')
            label = label.astype('int32')

            self.sum_metric += (pred_label.flatten() == label.flatten()).sum()
            self.num_inst += len(pred_label.flatten())
        


In [13]:
def evaluate_accuracy_non_blocking(data_iterator, net):
    acc = Accuracy(ctx=ctx, wait_to_read=False)
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [14]:
def evaluate_accuracy_wait_to_read(data_iterator, net):
    acc = Accuracy(ctx=ctx, wait_to_read=True)
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [15]:
def evaluate_accuracy_blocking(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

## Training Loop

In [17]:
%%time
epochs = 1
smoothing_constant = .01

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        
        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss)
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
        
    test_accuracy = evaluate_accuracy_non_blocking(test_data, net)
    train_accuracy = evaluate_accuracy_non_blocking(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss.asscalar(), train_accuracy, test_accuracy))    

Epoch 0. Loss: 0.047929, Train_acc 0.985916666667, Test_acc 0.9853
CPU times: user 8.63 s, sys: 2.37 s, total: 11 s
Wall time: 9.54 s


blocking: 43.6s
non-blocking: 

In [92]:
%%time
evaluate_accuracy_blocking(train_data, net)

CPU times: user 15.6 s, sys: 2.48 s, total: 18.1 s
Wall time: 15.3 s


0.94853333333333334

In [90]:
%%time
evaluate_accuracy_non_blocking(train_data, net)

CPU times: user 3.32 s, sys: 596 ms, total: 3.92 s
Wall time: 3.2 s


0.94853333333333334

In [91]:
%%time
evaluate_accuracy_wait_to_read(train_data, net)

CPU times: user 3.18 s, sys: 696 ms, total: 3.87 s
Wall time: 3.43 s


0.94853333333333334

## Conclusion

You might notice that by using ``gluon``, we get code that runs much faster whether on CPU or GPU. That's largely because ``gluon`` can call down to highly optimized layers that have been written in C++. 

## Next
[Deep convolutional networks (AlexNet)](../chapter04_convolutional-neural-networks/deep-cnns-alexnet.ipynb)

For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)