In [1]:
import logging
import random
import time

import matplotlib.pyplot as plt
import mxnet as mx
from mxnet import gluon, nd, autograd
import numpy as np

batch_size = 128
epochs = 5
ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()
lr = 0.01
alpha = .5


In [2]:
train_dataset = gluon.data.vision.MNIST(train=True)
test_dataset = gluon.data.vision.MNIST(train=False)
train_dataset[:5]

(
 [[[[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   ...
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]]
 
 
  [[[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   ...
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]]
 
 
  [[[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   ...
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [

In [3]:
def transform(x, y):
    x = x.transpose((2, 0, 1)).astype('float32') / 255
    y1 = y
    y2 = y % 2
    return x, np.float32(y1), np.float32(y2)

train_dataset_t = train_dataset.transform(transform)
test_dataset_t = test_dataset.transform(transform)
train_dataset[:5]

(
 [[[[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   ...
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]]
 
 
  [[[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   ...
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]]
 
 
  [[[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   ...
 
   [[0]
    [0]
    [0]
    ...
    [0]
    [0]
    [0]]
 
   [[0]
    [0]
    [

In [11]:
train_data = gluon.data.DataLoader(train_dataset_t, shuffle=True, last_batch='rollover', batch_size=batch_size)
test_data = gluon.data.DataLoader(test_dataset_t, shuffle=False, last_batch='rollover', batch_size=batch_size)
print("Input shape: %d, Target Labels: %d" % (train_dataset[0][0].shape[0],
                                              train_dataset_t[0][0].shape[0]))

Input shape: 28, Target Labels: 1


In [12]:
class MultiTaskNetwork(gluon.HybridBlock):
    def __init__(self):
        super(MultiTaskNetwork, self).__init__()

        self.shared = gluon.nn.HybridSequential()
        with self.shared.name_scope():
            self.shared.add(
                gluon.nn.Dense(128, activation='relu'),
                gluon.nn.Dense(64, activation='relu'),
                gluon.nn.Dense(10, activation='relu')
            )

        self.output1 = gluon.nn.Dense(10) # Num class
        self.output2 = gluon.nn.Dense(1) # Odd and Even

    def hybrid_forward(self, F, x, *args, **kwargs):
        y = self.shared(x)
        output1 = self.output1(y)
        output2 = self.output2(y)
        return output1, output2



In [13]:
loss_digits = gluon.loss.SoftmaxCELoss()
loss_odd_even = gluon.loss.SigmoidBCELoss()

loss_digits


SoftmaxCrossEntropyLoss(batch_axis=0, w=None)

In [14]:
# Init network
mx.random.seed(42)
random.seed(42)

net = MultiTaskNetwork()
net.initialize(mx.init.Xavier(), ctx=ctx)
net.hybridize()

trainer = gluon.Trainer(net.collect_params(), 'adam',
                        {'learning_rate': lr})
trainer

<mxnet.gluon.trainer.Trainer at 0x18d2970a2b0>

In [15]:
# Evaluate accuracy

def evaluate_accuracy(net, data_iterator):
    acc_digits = mx.metric.Accuracy(name='digits')
    acc_odd_even = mx.metric.Accuracy(name='odd_even')

    for i, (data, label_digit, label_odd_even) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label_digit = label_digit.as_in_context(ctx)
        label_odd_even = label_odd_even.as_in_context(ctx).reshape(-1, 1)

        output_digit, out_put_odd_even = net(data)

        acc_digits.update(label_digit, output_digit.softmax())
        acc_odd_even.update(label_odd_even, out_put_odd_even.sigmoid() > 0.5)
        return acc_digits.get(), acc_odd_even.get()



In [16]:
for e in range(epochs):
    acc_digits = mx.metric.Accuracy(name='digits')
    acc_odd_even = mx.metric.Accuracy(name='odd_even')

    loss_digits_ = 0.0
    loss_odd_even_ = 0.0

    for i, (data, label_digit, label_odd_even) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label_digit = label_digit.as_in_context(ctx)
        label_odd_even = label_odd_even.as_in_context(ctx).reshape(-1, 1)

        with autograd.record():
            output_digit, output_odd_even = net(data)
            l_digits = loss_digits(output_digit, label_digit)
            l_odd_even = loss_odd_even(output_odd_even, label_odd_even)

            l_combined = (1- alpha) * l_digits + alpha * l_odd_even

        l_combined.backward()
        trainer.step(data.shape[0])

        loss_digits_ += l_digits.mean()
        loss_odd_even_ += l_odd_even.mean()
        acc_digits.update(label_digit, output_digit.softmax())
        acc_odd_even.update(label_odd_even, output_odd_even.sigmoid() > 0.5)

        print("Epoch %d, Acc digits: %.4f, Loss digits: %.4f" % (e, acc_digits.get()[1], loss_digits_.asscalar() /
                                                                 (i+1)))
        print("Epoch %d, Acc odd_even: %.4f, Loss odd_even: %.4f" % (e, acc_odd_even.get()[1], loss_odd_even_.asscalar() /
                                                                 (i+1)))
        print("Epoch %d, Testing Accuracies: %.4f" % (e, evaluate_accuracy(net, test_data)))



Epoch 0, Acc digits: 0.0781, Loss digits: 2.3049
Epoch 0, Acc odd_even: 0.4609, Loss odd_even: 0.7011


TypeError: must be real number, not tuple