In [1]:
import sys
sys.path.append("/workspace/server")
import warnings

from thera.python.mxnet import mxnet as mx
warnings.filterwarnings('ignore')

import random
import numpy as np
import mxnet as mx
from mxnet import gluon
import gluonnlp as nlp
# https://gluon-nlp.mxnet.io/master/examples/word_embedding/word_embedding.html
import re

In [2]:

def f(a):
    b = a * 2
    while b.norm().asscalar() < 1000:
        b = b * 2
    if b.sum().asscalar() >= 0:
        c = b[0]
    else:
        c = b[1]
    return c


def f(a):
    b = a * 2
    while b.norm().asscalar() < 1000:
        b = b * 2
    if b.sum().asscalar() >= 0:
        c = b[0]
    else:
        c = b[1]
    return c



In [3]:

a = mx.nd.random.uniform(shape=2)
a.attach_grad()
with mx.autograd.record():
    c = f(a)
c.backward()



Sometimes we want to write dynamic programs where the execution depends on some real-time values. MXNet will record the execution trace and compute the gradient as well.

Consider the following function f: it doubles the inputs until it’s norm reaches 1000. Then it selects one element depending on the sum of its elements.
    We know that b is a linear function of a, and c is chosen from b. Then the gradient with respect to a be will be either [c/a[0], 0] or [0, c/a[1]], depending on which element from b we picked. Let’s find the results:

In [4]:
a


[0.5488135 0.5928446]
<NDArray 2 @cpu(0)>

In [5]:
[a.grad, c/a]

[
 [2048.    0.]
 <NDArray 2 @cpu(0)>, 
 [2048.     1895.8933]
 <NDArray 2 @cpu(0)>]

# https://mxnet.apache.org/api/python/docs/tutorials/getting-started/gluon_from_experiment_to_deployment.html

In [6]:

data_util_file = "oxford_102_flower_dataset.py"
base_url = "https://raw.githubusercontent.com/apache/incubator-mxnet/master/docs/tutorial_utils/data/{}?raw=true"
mx.test_utils.download(base_url.format(data_util_file), fname=data_util_file)
import oxford_102_flower_dataset

# download and move data to train, test, valid folders
path = './data'
oxford_102_flower_dataset.get_data(path)

In [7]:
import math
import os
import time

from mxnet import autograd
from mxnet import gluon, init
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms
from mxnet.gluon.model_zoo.vision import resnet50_v2

Next, we define the hyper-parameters that we will use for fine-tuning. We will use the MXNet learning rate scheduler to adjust learning rates during training. Here we set the epochs to 1 for quick demonstration, please change to 40 for actual training.

In [8]:
classes = 102
epochs = 1
lr = 0.001
per_device_batch_size = 32
momentum = 0.9
wd = 0.0001

lr_factor = 0.75
# learning rate change at following epochs
lr_epochs = [10, 20, 30]

num_gpus = mx.context.num_gpus()
# you can replace num_workers with the number of cores on you device
num_workers = 8
ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
batch_size = per_device_batch_size * max(num_gpus, 1)

Now we will apply data augmentations on training images. This makes minor alterations on the training images, and our model will consider them as distinct images. This can be very useful for fine-tuning on a relatively small dataset, and it will help improve the model. We can use the Gluon DataSet API, DataLoader API, and Transform API to load the images and apply the following data augmentations: 1. Randomly crop the image and resize it to 224x224 2. Randomly flip the image horizontally 3. Randomly jitter color and add noise 4. Transpose the data from [height, width, num_channels] to [num_channels, height, width], and map values from [0, 255] to [0, 1] 5. Normalize with the mean and standard deviation from the ImageNet dataset.

For validation and inference, we only need to apply step 1, 4, and 5. We also need to save the mean and standard deviation values for inference using C++.m

In [9]:
jitter_param = 0.4
lighting_param = 0.1

# mean and std for normalizing image value in range (0,1)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

training_transformer = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

validation_transformer = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# save mean and std NDArray values for inference
# Full create an array and fill it with val
mean_img = mx.nd.stack(*[mx.nd.full((224, 224), m) for m in mean])
std_img = mx.nd.stack(*[mx.nd.full((224, 224), s) for s in std])
mx.nd.save('mean_std_224.nd', {"mean_img": mean_img, "std_img": std_img})

train_path = os.path.join(path, 'train')
val_path = os.path.join(path, 'valid')
test_path = os.path.join(path, 'test')

# loading the data and apply pre-processing(transforms) on images
train_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(train_path).transform_first(training_transformer),
    batch_size=batch_size, shuffle=True, num_workers=num_workers)

val_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(val_path).transform_first(validation_transformer),
    batch_size=batch_size, shuffle=False, num_workers=num_workers)

test_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(test_path).transform_first(validation_transformer),
    batch_size=batch_size, shuffle=False, num_workers=num_workers)

Process ForkPoolWorker-17:
Process ForkPoolWorker-20:
Process ForkPoolWorker-10:
Process ForkPoolWorker-19:
Process ForkPoolWorker-12:
Process ForkPoolWorker-24:
Process ForkPoolWorker-9:
Traceback (most recent call last):
Process ForkPoolWorker-15:
Traceback (most recent call last):
Process ForkPoolWorker-22:
Traceback (most recent call last):
Process ForkPoolWorker-18:
Process ForkPoolWorker-23:
Process ForkPoolWorker-16:
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Process ForkPoolW

  File "/usr/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/usr/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
KeyboardInterrupt
  File "/usr/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
KeyboardInterrupt
  File "/usr/lib/python3.5/multiprocessing/pool.p

In [10]:
mx.nd.full((2,2), 3)


[[3. 3.]
 [3. 3.]]
<NDArray 2x2 @cpu(0)>

In [11]:
[mx.nd.full((224, 224), m) for m in mean]

[
 [[0.485 0.485 0.485 ... 0.485 0.485 0.485]
  [0.485 0.485 0.485 ... 0.485 0.485 0.485]
  [0.485 0.485 0.485 ... 0.485 0.485 0.485]
  ...
  [0.485 0.485 0.485 ... 0.485 0.485 0.485]
  [0.485 0.485 0.485 ... 0.485 0.485 0.485]
  [0.485 0.485 0.485 ... 0.485 0.485 0.485]]
 <NDArray 224x224 @cpu(0)>, 
 [[0.456 0.456 0.456 ... 0.456 0.456 0.456]
  [0.456 0.456 0.456 ... 0.456 0.456 0.456]
  [0.456 0.456 0.456 ... 0.456 0.456 0.456]
  ...
  [0.456 0.456 0.456 ... 0.456 0.456 0.456]
  [0.456 0.456 0.456 ... 0.456 0.456 0.456]
  [0.456 0.456 0.456 ... 0.456 0.456 0.456]]
 <NDArray 224x224 @cpu(0)>, 
 [[0.406 0.406 0.406 ... 0.406 0.406 0.406]
  [0.406 0.406 0.406 ... 0.406 0.406 0.406]
  [0.406 0.406 0.406 ... 0.406 0.406 0.406]
  ...
  [0.406 0.406 0.406 ... 0.406 0.406 0.406]
  [0.406 0.406 0.406 ... 0.406 0.406 0.406]
  [0.406 0.406 0.406 ... 0.406 0.406 0.406]]
 <NDArray 224x224 @cpu(0)>]

# Loading pre-trained model

We will use pre-trained ResNet50_v2 model which was pre-trained on the ImageNet Dataset with 1000 classes. To match the classes in the Flower dataset, we must redefine the last softmax (output) layer to be 102, then initialize the parameters.

Before we go to training, one unique Gluon feature you should be aware of is hybridization. It allows you to convert your imperative code to a static symbolic graph, which is much more efficient to execute. There are two main benefits of hybridizing your model: better performance and easier serialization for deployment. The best part is that it’s as simple as just calling net.hybridize(). To know more about Gluon hybridization, please follow the hybridization tutorial.

In [12]:
# load pre-trained resnet50_v2 from model zoo
finetune_net = resnet50_v2(pretrained=True, ctx=ctx)

# change last softmax layer since number of classes are different
with finetune_net.name_scope():
    finetune_net.output = nn.Dense(classes)
finetune_net.output.initialize(init.Xavier(), ctx=ctx)
# hybridize for better performance
finetune_net.hybridize()

num_batch = len(train_data)

# setup learning rate scheduler
iterations_per_epoch = math.ceil(num_batch)
# learning rate change at following steps
lr_steps = [epoch * iterations_per_epoch for epoch in lr_epochs]
schedule = mx.lr_scheduler.MultiFactorScheduler(step=lr_steps, factor=lr_factor, base_lr=lr)

# setup optimizer with learning rate scheduler, metric, and loss function
sgd_optimizer = mx.optimizer.SGD(learning_rate=lr, lr_scheduler=schedule, momentum=momentum, wd=wd)
metric = mx.metric.Accuracy()
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

Downloading /root/.mxnet/models/resnet50_v2-ecdde353.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet50_v2-ecdde353.zip...


In [16]:
def test(net, val_data, ctx):
    metric = mx.metric.Accuracy()
    for i, (data, label) in enumerate(val_data):
        data = gluon.utils.split_and_load(data, ctx_list=ctx, even_split=False)
        label = gluon.utils.split_and_load(label, ctx_list=ctx, even_split=False)
        outputs = [net(x) for x in data]
        metric.update(label, outputs)
    return metric.get()

trainer = gluon.Trainer(finetune_net.collect_params(), optimizer=sgd_optimizer)
epochs = 5
# start with epoch 1 for easier learning rate calculation
for epoch in range(1, epochs + 1):

    tic = time.time()
    train_loss = 0
    metric.reset()

    for i, (data, label) in enumerate(train_data):
        if i > 50:
            continue
        # get the images and labels
        data = gluon.utils.split_and_load(data, ctx_list=ctx, even_split=False)
        label = gluon.utils.split_and_load(label, ctx_list=ctx, even_split=False)
        with autograd.record():
            outputs = [finetune_net(x) for x in data]
            loss = [softmax_cross_entropy(yhat, y) for yhat, y in zip(outputs, label)]
        for l in loss:
            l.backward()

        trainer.step(batch_size)
        train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)
        metric.update(label, outputs)

    _, train_acc = metric.get()
    train_loss /= num_batch
    _, val_acc = test(finetune_net, val_data, ctx)

    print('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f | learning-rate: %.3E | time: %.1f' %
          (epoch, train_acc, train_loss, val_acc, trainer.learning_rate, time.time() - tic))

_, test_acc = test(finetune_net, test_data, ctx)
print('[Finished] Test-acc: %.3f' % (test_acc))

KeyboardInterrupt: 