In [7]:
!nvidia-smi

Fri May 25 00:02:08 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:29:00.0  On |                  N/A |
|  0%   43C    P8    19W / 280W |   4443MiB / 11169MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [8]:
from mxnet.gluon import nn

def vgg_block(num_convs, channels):
    out = nn.Sequential()
    for _ in range(num_convs):
        out.add(
            nn.Conv2D(channels = channels, kernel_size = 3, 
                     padding = 1, activation = 'relu')
        )
    out.add(nn.MaxPool2D(pool_size = 2, strides = 2))
    return out

In [9]:
from mxnet import nd

blk = vgg_block(2, 128)
blk.initialize()
x = nd.random.uniform(shape = (2, 3, 16, 16))
y = blk(x)
y.shape

(2, 128, 8, 8)

In [10]:
def vgg_stack(architecture):
    out = nn.Sequential()
    for (num_convs, channels) in architecture:
        out.add(vgg_block(num_convs, channels))
    return out

In [11]:
num_output = 10
architecture = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
net = nn.Sequential()
# add name_scope on the outermost Sequential
with net.name_scope():
    net.add(
        vgg_stack(architecture), 
        nn.Flatten(), 
        nn.Dense(4096, activation = 'relu'), 
        nn.Dropout(.5), 
        nn.Dense(4096, activation = 'relu'), 
        nn.Dropout(.5), 
        nn.Dense(num_output))

In [12]:
import sys
sys.path.append('..')
import utils
from mxnet import gluon
from mxnet import init

train_data, test_data = utils.load_data_fashion_mnist(
    batch_size = 64, resize = 96)

ctx = utils.try_gpu()
net.initialize(ctx = ctx, init = init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 
                       'sgd', {'learning_rate': 0.05})
utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs = 5)

Start training on  gpu(0)
Epoch 0. Loss: 0.921, Train acc 0.66, Test acc 0.83, Time 57.4 sec
Epoch 1. Loss: 0.399, Train acc 0.85, Test acc 0.88, Time 57.2 sec
Epoch 2. Loss: 0.325, Train acc 0.88, Test acc 0.89, Time 57.2 sec
Epoch 3. Loss: 0.283, Train acc 0.89, Test acc 0.90, Time 57.1 sec
Epoch 4. Loss: 0.250, Train acc 0.91, Test acc 0.91, Time 57.6 sec
