In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import mxnet as mx
mx.npx.set_np()

from mxnet import gluon, nd, image, autograd
from mxnet.gluon.data.vision import transforms

In case you don't recognize it, the image is a poorly-drawn airplane :)

Now we define transformations for the image.



In [3]:
batch_size = 128

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

transform_train = transforms.Compose([
    # Randomly crop an area, and then resize it to be 32x32
    transforms.RandomResizedCrop(32),
    # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    # Randomly jitter the brightness, contrast and saturation of the image
    transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

dataset_train = gluon.data.vision.CIFAR10(train=True).transform_first(transform_train)
dataset_test = gluon.data.vision.CIFAR10(train=False).transform_first(transform_test)


datagen_train = gluon.data.DataLoader(dataset_train,batch_size=batch_size,shuffle=True,num_workers=4)
datagen_test = gluon.data.DataLoader(dataset_test,batch_size=batch_size,shuffle=False,num_workers=4)


This transformation function does three things:
resize and crop the image to 32x32 in size,
transpose it to `num_channels x height x width`,
and normalize with mean and standard deviation calculated across all CIFAR10 images.

In [5]:
ctx = mx.gpu() if mx.util.get_gpu_count() >0 else mx.cpu()

In [6]:
print (ctx)

gpu(0)


In [37]:
from mxnet.gluon import HybridBlock
from mxnet.gluon import nn as nn

class Net(HybridBlock):
    def __init__(self,**kwargs):
        super().__init__(**kwargs)
        
        # Good book keeping practices
        self.conv1 = nn.Conv2D(channels=6, kernel_size=5) # in_channels=3
        self.pool  = nn.MaxPool2D(pool_size=(2,2))
        self.conv2 = nn.Conv2D(channels=16, kernel_size=5) # in_channels=6
        
        self.flatten = nn.Flatten()
        
        self.fc1   = nn.Dense(units=120) # in_units = 16*5*5 
        self.fc2   = nn.Dense(units=84) # in_units = 120
        
        # @@@@@@@@@@@ Here 10 represents the 10 classes of cifar10 @@@@@@@@@@
        self.fc3  = gluon.nn.Dense(units=10) # in units = 84 

    def forward(self,  x):
        x = self.pool(mx.npx.relu(self.conv1(x)))
        x = self.pool(mx.npx.relu(self.conv2(x)))
        x = self.flatten(x) # transforms to x.shape[0], np.prod(x.shape[1:])
        
        x = mx.npx.relu(self.fc1(x))
        x = mx.npx.relu(self.fc2(x))
        x = self.fc3(x)
        return x





In [38]:
net = Net()
# In mxnet you need to initialize the model expliclitely
net.initialize(ctx=ctx)
# In mxnet you can get ~ x3 speed if you hybridize
net.hybridize() 



In [39]:
# Nesterov accelerated gradient descent
optimizer = 'Adam'
# Set parameters
optimizer_params = {'learning_rate': 0.001}

# Define our trainer for net
trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

In [40]:
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

In [41]:
train_metric = gluon.metric.Accuracy()

In [45]:
def test(ctx, val_data):
    metric = gluon.metric.Accuracy()
    for i, batch in enumerate(val_data):
        #data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        data = batch[0].as_in_context(ctx)
        label = batch[1]
        # outputs = [net(X) for X in data]
        #outputs = nd.concatenate(outputs,axis=0)
        outputs = net(data)
        metric.update(label, outputs)
    return metric.get()

In [47]:
epochs = 10

for epoch in range(epochs):
    tic = time.time()
    train_metric.reset()
    train_loss = 0

    # Loop through each batch of training data
    for i, (data,label) in enumerate(datagen_train):
        # Extract data and label
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)

        # AutoGrad
        with autograd.record():
            output = net(data)
            loss = loss_fn(output, label)
            
        # Backpropagation
        loss.backward()

        # Optimize
        trainer.step(batch_size) # This is the batch_size

        # Update metrics
        train_loss += loss.sum().item() 
        train_metric.update(label, output)

    name, acc = train_metric.get()
    # Evaluate on Validation data
    name, val_acc = test(ctx, datagen_test)

    # Update history and print metrics
    print('[Epoch {}] train accuracy={} val_accuracy={} loss={} time={}'.format(epoch, acc, val_acc, train_loss, time.time()-tic))


[Epoch 0] train accuracy=0.41272 val_accuracy=0.4853 loss=80895.16115570068 time=2.713114023208618
[Epoch 1] train accuracy=0.42654 val_accuracy=0.5019 loss=79397.92932891846 time=2.8134663105010986
[Epoch 2] train accuracy=0.43396 val_accuracy=0.5066 loss=78409.96029663086 time=2.704646110534668
[Epoch 3] train accuracy=0.4417 val_accuracy=0.5109 loss=77225.24891662598 time=2.6612391471862793
[Epoch 4] train accuracy=0.44934 val_accuracy=0.5337 loss=76299.92738342285 time=2.742936134338379
[Epoch 5] train accuracy=0.45842 val_accuracy=0.533 loss=75349.93766784668 time=2.803588390350342
[Epoch 6] train accuracy=0.46418 val_accuracy=0.5222 loss=74930.83156585693 time=2.8833534717559814
[Epoch 7] train accuracy=0.47074 val_accuracy=0.5471 loss=74126.69990539551 time=2.9083962440490723
[Epoch 8] train accuracy=0.47254 val_accuracy=0.5617 loss=73348.30010986328 time=2.9530951976776123
[Epoch 9] train accuracy=0.4772 val_accuracy=0.5452 loss=72672.45819091797 time=2.9021899700164795


# if you want to save/load your network

In [None]:
net.save_parameters('dive_deep_cifar10_resnet20_v2.params') # save
# net.load_parameters('dive_deep_cifar10_resnet20_v2.params', ctx=ctx) # load 

## MultiGPU example

In [48]:
# Change context to a list of mx contexts

# number of GPUs to use
num_gpus = mx.util.get_gpu_count()
ctx = [mx.gpu(i) for i in range(num_gpus)]

# Initialize again net and trainer in a multigpu context

In [49]:
net = Net()
# In mxnet you need to initialize the model expliclitely
net.initialize(ctx=ctx)
# In mxnet you can get ~ x3 speed if you hybridize
net.hybridize() 

# Initialize trainer 
trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)


In [50]:
def test(ctx, val_data):
    metric = gluon.metric.Accuracy()
    for i, batch in enumerate(val_data):
        # split in a list of mx.np.arrays 
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = batch[1]
        outputs = [net(X) for X in data]
        outputs = mx.np.concatenate(outputs,axis=0) # concatenate outputs along batch dimension
        metric.update(label, outputs)
    return metric.get()

In [53]:
epochs = 3

for epoch in range(epochs):
    tic = time.time()
    train_metric.reset()
    train_loss = 0

    # Loop through each batch of training data
    for i, batch in enumerate(datagen_train):
        # Extract data and label
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)

        # AutoGrad
        with autograd.record():
            output = [net(X) for X in data]
            loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]

        # Backpropagation
        for l in loss:
            l.backward()

        # Optimize
        trainer.step(batch_size)

        # Update metrics
        # Now loss is a list of outputs!!!
        train_loss += sum([l.sum().item() for l in loss])
        train_metric.update(label, output)

    name, acc = train_metric.get()
    # Evaluate on Validation data
    name, val_acc = test(ctx, datagen_test)

    # Update history and print metrics
    print('[Epoch %d] train=%f val=%f loss=%f time: %f' %
        (epoch, acc, val_acc, train_loss, time.time()-tic))



[Epoch 0] train=0.274440 val=0.387500 loss=97386.380310 time: 2.744569
[Epoch 1] train=0.346940 val=0.438300 loss=88356.218491 time: 2.793489
[Epoch 2] train=0.376180 val=0.472200 loss=84577.858002 time: 2.752750
