In [1]:
from mxnet.gluon import nn

def alex_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            # stage 1:
            nn.Conv2D(channels = 96, kernel_size = 11, 
                      strides = 4, activation = 'relu'), 
            nn.MaxPool2D(pool_size = 3, strides = 2), 
            # stage 2:
            nn.Conv2D(channels = 256, kernel_size = 5, 
                      padding = 2, activation = 'relu'), 
            nn.MaxPool2D(pool_size = 3, strides = 2), 
            # stage 3:
            nn.Conv2D(channels = 384, kernel_size = 3, 
                      padding = 1, activation = 'relu'), 
            nn.Conv2D(channels = 384, kernel_size = 3, 
                      padding = 1, activation = 'relu'), 
            nn.Conv2D(channels = 256, kernel_size = 3, 
                      padding = 1, activation = 'relu'), 
            nn.MaxPool2D(pool_size = 3, strides = 2), 
            # stage 4:
            nn.Flatten(),    # 加不加都行
            nn.Dense(4096, activation = 'relu'), 
            nn.Dropout(.5), 
            # stage 5:
            nn.Dense(4096, activation = 'relu'), 
            nn.Dropout(.5), 
            # stage 6:
            nn.Dense(10)
        )
        return net

  import OpenSSL.SSL


In [2]:
from mxnet import ndarray as nd
X = nd.random.uniform(shape=(1,1,224,224))
net = alex_net()
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

sequential0_conv0 output shape:	 (1, 96, 54, 54)
sequential0_pool0 output shape:	 (1, 96, 26, 26)
sequential0_conv1 output shape:	 (1, 256, 26, 26)
sequential0_pool1 output shape:	 (1, 256, 12, 12)
sequential0_conv2 output shape:	 (1, 384, 12, 12)
sequential0_conv3 output shape:	 (1, 384, 12, 12)
sequential0_conv4 output shape:	 (1, 256, 12, 12)
sequential0_pool2 output shape:	 (1, 256, 5, 5)
sequential0_flatten0 output shape:	 (1, 6400)
sequential0_dense0 output shape:	 (1, 4096)
sequential0_dropout0 output shape:	 (1, 4096)
sequential0_dense1 output shape:	 (1, 4096)
sequential0_dropout1 output shape:	 (1, 4096)
sequential0_dense2 output shape:	 (1, 10)


In [3]:
import sys
sys.path.append('..')
import utils

train_data, test_data = utils.load_data_fashion_mnist(batch_size = 64, resize = 224)

In [4]:
from mxnet import init
from mxnet import gluon

ctx = utils.try_gpu()
net = alex_net()
net.collect_params().initialize(ctx = ctx, init = init.Xavier(), force_reinit = True)

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01})

utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs = 3)

Start training on  gpu(0)
Epoch 0. Loss: 1.017, Train acc 0.62, Test acc 0.80, Time 57.0 sec
Epoch 1. Loss: 0.529, Train acc 0.81, Test acc 0.85, Time 55.9 sec
Epoch 2. Loss: 0.435, Train acc 0.84, Test acc 0.87, Time 56.1 sec
