In [None]:
pip install mxnet

In [None]:
pip install horovod

In [None]:
import logging
import os
import zipfile
import time

import mxnet as mx
import horovod.mxnet as hvd
from mxnet import autograd, gluon, nd
from mxnet.test_utils import download

In [None]:
# Training settings
batch_size = 64
dtype = "float32"
epochs = 5
lr = 0.01
momentum = 0.9
no_cuda = True
fp16_allreduce = False
gradient_predivide_factor = 1.0


logging.basicConfig(level=logging.INFO)

In [None]:
# Function to get mnist iterator given a rank
def get_mnist_iterator(rank, batch_size):
    data_dir = "data-%d" % rank
    if not os.path.isdir(data_dir):
        os.makedirs(data_dir)
    zip_file_path = download('http://data.mxnet.io/mxnet/data/mnist.zip',
                             dirname=data_dir)
    with zipfile.ZipFile(zip_file_path) as zf:
        zf.extractall(data_dir)

    input_shape = (1, 28, 28)

    train_iter = mx.io.MNISTIter(
        image="%s/train-images-idx3-ubyte" % data_dir,
        label="%s/train-labels-idx1-ubyte" % data_dir,
        input_shape=input_shape,
        batch_size=batch_size,
        shuffle=True,
        flat=False,
        num_parts=hvd.size(),
        part_index=hvd.rank()
    )

    val_iter = mx.io.MNISTIter(
        image="%s/t10k-images-idx3-ubyte" % data_dir,
        label="%s/t10k-labels-idx1-ubyte" % data_dir,
        input_shape=input_shape,
        batch_size=batch_size,
        flat=False,
    )

    return train_iter, val_iter


# Function to define neural network
def conv_nets():
    net = gluon.nn.HybridSequential()
    with net.name_scope():
        net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
        net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
        net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))
        net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
        net.add(gluon.nn.Flatten())
        net.add(gluon.nn.Dense(512, activation="relu"))
        net.add(gluon.nn.Dense(10))
    return net


# Function to evaluate accuracy for a model
def evaluate(model, data_iter, context):
    data_iter.reset()
    metric = mx.metric.Accuracy()
    for _, batch in enumerate(data_iter):
        data = batch.data[0].as_in_context(context)
        label = batch.label[0].as_in_context(context)
        output = model(data.astype(dtype, copy=False))
        metric.update([label], [output])

    return metric.get()


# Initialize Horovod
hvd.init()

# Horovod: pin context to local rank
context = mx.cpu(hvd.local_rank()) if no_cuda else mx.gpu(hvd.local_rank())
num_workers = hvd.size()

# Load training and validation data
train_data, val_data = get_mnist_iterator(hvd.rank(), batch_size)

# Build model
model = conv_nets()
model.cast(dtype)
model.hybridize()

# Create optimizer
optimizer_params = {'momentum': momentum,
                    'learning_rate': lr * hvd.size()}
opt = mx.optimizer.create('sgd', **optimizer_params)

# Initialize parameters
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in",
                             magnitude=2)
model.initialize(initializer, ctx=context)

# Horovod: fetch and broadcast parameters
params = model.collect_params()
if params is not None:
    hvd.broadcast_parameters(params, root_rank=0)

# Horovod: create DistributedTrainer, a subclass of gluon.Trainer
compression = hvd.Compression.fp16 if fp16_allreduce else hvd.Compression.none
trainer = hvd.DistributedTrainer(params, opt, compression=compression,
                                 gradient_predivide_factor=gradient_predivide_factor)

# Create loss function and train metric
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
metric = mx.metric.Accuracy()

# Train model
for epoch in range(epochs):
    tic = time.time()
    train_data.reset()
    metric.reset()
    for nbatch, batch in enumerate(train_data, start=1):
        data = batch.data[0].as_in_context(context)
        label = batch.label[0].as_in_context(context)
        with autograd.record():
            output = model(data.astype(dtype, copy=False))
            loss = loss_fn(output, label)
        loss.backward()
        trainer.step(batch_size)
        metric.update([label], [output])

        if nbatch % 100 == 0:
            name, acc = metric.get()
            print('[Epoch %d Batch %d] Training: %s=%f' %
                         (epoch, nbatch, name, acc))

    if hvd.rank() == 0:
        elapsed = time.time() - tic
        speed = nbatch * batch_size * hvd.size() / elapsed
        print(f'Epoch {epoch}\tSpeed={speed} samples/s\tTime cost={elapsed}')

    # Evaluate model accuracy
    _, train_acc = metric.get()
    name, val_acc = evaluate(model, val_data, context)
    if hvd.rank() == 0:
        print(f'Epoch{epoch}\tTrain: {name}={train_acc}\tValidation: {name}={val_acc}')

    if hvd.rank() == 0 and epoch == epochs - 1:
        assert val_acc > 0.96, "Achieved accuracy (%f) is lower than expected\
                                (0.96)" % val_acc

[Epoch 0 Batch 100] Training: accuracy=0.827969
[Epoch 0 Batch 200] Training: accuracy=0.891484
[Epoch 0 Batch 300] Training: accuracy=0.913594
[Epoch 0 Batch 400] Training: accuracy=0.926250
[Epoch 0 Batch 500] Training: accuracy=0.935438
[Epoch 0 Batch 600] Training: accuracy=0.941536
[Epoch 0 Batch 700] Training: accuracy=0.946607
[Epoch 0 Batch 800] Training: accuracy=0.950820
[Epoch 0 Batch 900] Training: accuracy=0.953681
Epoch 0	Speed=1127.3894082024588 samples/s	Time cost=53.19191360473633
Epoch0	Train: accuracy=0.9548926093916755	Validation: accuracy=0.983573717948718
[Epoch 1 Batch 100] Training: accuracy=0.985156
[Epoch 1 Batch 200] Training: accuracy=0.985547
[Epoch 1 Batch 300] Training: accuracy=0.983958
[Epoch 1 Batch 400] Training: accuracy=0.984570
[Epoch 1 Batch 500] Training: accuracy=0.984688
[Epoch 1 Batch 600] Training: accuracy=0.984766
[Epoch 1 Batch 700] Training: accuracy=0.985000
[Epoch 1 Batch 800] Training: accuracy=0.985469
[Epoch 1 Batch 900] Training: ac