In [8]:
import mxnet as mx
from mxnet import nd, autograd, gluon
import numpy as np

In [2]:
ctx = mx.cpu()

In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [4]:
num_hidden = 256
net = gluon.nn.Sequential()

with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dropout(.5))
    
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dropout(.5))
    
    net.add(gluon.nn.Dense(num_outputs))

In [5]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [9]:
for x, _ in train_data:
    x = x.as_in_context(ctx)
    break
print(net(x[0:1]))
print(net(x[0:1]))


[[ 0.06113175  0.39647946  0.0284294  -0.07397448  0.09896602  0.13633069
  -0.38401553 -0.07457183 -0.28846028  0.01900677]]
<NDArray 1x10 @cpu(0)>

[[ 0.06113175  0.39647946  0.0284294  -0.07397448  0.09896602  0.13633069
  -0.38401553 -0.07457183 -0.28846028  0.01900677]]
<NDArray 1x10 @cpu(0)>


In [10]:
with autograd.predict_mode():
    print(net(x[0:1]))
    print(net(x[0:1]))


[[ 0.06113175  0.39647946  0.0284294  -0.07397448  0.09896602  0.13633069
  -0.38401553 -0.07457183 -0.28846028  0.01900677]]
<NDArray 1x10 @cpu(0)>

[[ 0.06113175  0.39647946  0.0284294  -0.07397448  0.09896602  0.13633069
  -0.38401553 -0.07457183 -0.28846028  0.01900677]]
<NDArray 1x10 @cpu(0)>


In [11]:
with autograd.train_mode():
    print(net(x[0:1]))
    print(net(x[0:1]))


[[-0.03892522 -0.13739786 -0.09202147 -0.13630009  0.07733364  0.40745163
  -0.08688814  0.17594074 -0.2677747  -0.12195608]]
<NDArray 1x10 @cpu(0)>

[[ 0.33375555  0.06176083  0.53329396 -0.58870173  0.68023777  0.37431058
  -0.27777871  0.40373668  0.27044857 -0.15855731]]
<NDArray 1x10 @cpu(0)>


In [12]:
with autograd.predict_mode():
    print(autograd.is_training())

with autograd.train_mode():
    print(autograd.is_training())

False
True


In [13]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [14]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

In [15]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape((-1, 784))
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [16]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx).reshape((-1, 784))
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
            loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.33805566903, Train_acc 0.9215, Test_acc 0.9163
Epoch 1. Loss: 0.24248183529, Train_acc 0.9593, Test_acc 0.9585
Epoch 2. Loss: 0.193205089792, Train_acc 0.967666666667, Test_acc 0.9655
Epoch 3. Loss: 0.189879027747, Train_acc 0.973383333333, Test_acc 0.9687
Epoch 4. Loss: 0.157510939468, Train_acc 0.97735, Test_acc 0.9719
Epoch 5. Loss: 0.15309121974, Train_acc 0.97795, Test_acc 0.9724
Epoch 6. Loss: 0.130983078682, Train_acc 0.982083333333, Test_acc 0.9752
Epoch 7. Loss: 0.125865770676, Train_acc 0.983883333333, Test_acc 0.9767
Epoch 8. Loss: 0.115353022325, Train_acc 0.98445, Test_acc 0.975
Epoch 9. Loss: 0.112265189683, Train_acc 0.986833333333, Test_acc 0.977
