In [1]:
from mxnet import init
from mxnet import gluon
from mxnet.gluon import nn
import mxnet as mx
import sys
sys.path.append('..')
from utils import *
ctx = mx.gpu()

In [2]:
batch_size = 256
train_data, test_data = load_cifar(50000, 10000, batch_size, route = '/home/sinyer/Python/data')

In [3]:
class Residual(nn.Block):
    def __init__(self, channels, same_shape=True, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.same_shape = same_shape
        with self.name_scope():
            strides = 1 if same_shape else 2
            self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1, strides=strides)
            self.bn1 = nn.BatchNorm()
            self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
            self.bn2 = nn.BatchNorm()
            if not same_shape:
                self.conv3 = nn.Conv2D(channels, kernel_size=1, strides=strides)
    def forward(self, x):
        out = nd.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if not self.same_shape:
            x = self.conv3(x)
        return nd.relu(out + x)

class ResNet(nn.Block):
    def __init__(self, num_classes, **kwargs):
        super(ResNet, self).__init__(**kwargs)
        with self.name_scope(): 
            b1 = nn.Conv2D(16, kernel_size=3, strides=1, padding=1)
            b2 = nn.Sequential()
            b2.add(Residual(16),Residual(16))
            b3 = nn.Sequential()
            b3.add(Residual(32, same_shape=False),Residual(32))
            b4 = nn.Sequential()
            b4.add(nn.AvgPool2D(pool_size=3),nn.Dense(num_classes))
            self.net = nn.Sequential()
            self.net.add(b1, b2, b3, b4)
    def forward(self, x):
        out = x
        for i, b in enumerate(self.net):
            out = b(out)
        return out

net = ResNet(10)
net.initialize(init=init.Xavier(), ctx=ctx)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05, 'momentum': 0.9})

In [4]:
train(ctx, train_data, test_data, net, loss, trainer, epochs=100, n=10, print_batches=None)

Epoch 0, Loss: 1.795772, Train acc 0.377192, Test acc 0.450391, , Time 8.095822
Epoch 1, Loss: 1.321580, Train acc 0.525502, Test acc 0.554980, , Time 6.434900
Epoch 2, Loss: 1.115199, Train acc 0.607510, Test acc 0.595313, , Time 6.414872
Epoch 3, Loss: 0.984597, Train acc 0.654963, Test acc 0.622754, , Time 6.464685
Epoch 4, Loss: 0.898887, Train acc 0.686065, Test acc 0.634668, , Time 6.486274
Epoch 5, Loss: 0.839157, Train acc 0.706649, Test acc 0.682324, , Time 6.434411
Epoch 6, Loss: 0.790974, Train acc 0.722919, Test acc 0.695312, , Time 6.470334
Epoch 7, Loss: 0.752721, Train acc 0.737564, Test acc 0.671777, , Time 6.475589
Epoch 8, Loss: 0.711591, Train acc 0.751012, Test acc 0.676367, , Time 6.452137
Epoch 9, Loss: 0.680597, Train acc 0.762105, Test acc 0.720801, , Time 6.464008
Epoch 10, Loss: 0.661112, Train acc 0.767423, Test acc 0.682520, , Time 6.614798
Epoch 11, Loss: 0.633420, Train acc 0.779321, Test acc 0.695312, , Time 6.471489
Epoch 12, Loss: 0.609999, Train acc 0.