# MXNet实现 AlexNet

In [1]:
import sys 
import time

In [2]:
sys.path.append('../')

In [3]:
import gluonbook as gb
import mxnet as mx 
from mxnet import gluon,nd,autograd ,init
from mxnet.gluon import data as gdata,loss as gloss,nn

  from ._conv import register_converters as _register_converters


## AlexNet包含8层变化，其中有五层卷积层和两层全连接层隐藏层，以及一个全连接输出层

In [4]:
def try_gpu4():
    try:
        _ = nd.ones((1,),ctx = mx.gpu())
        ctx = mx.gpu()
    except e in mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx

In [5]:
ctx = try_gpu4()
ctx

gpu(0)

In [6]:
AlexNet = nn.Sequential()
AlexNet.add(nn.Conv2D(channels=96,kernel_size=11,strides=4,activation='relu'),
            nn.MaxPool2D(pool_size=3,strides=2),
            #减小卷积窗口，并且使用填充2，是的输入输出的宽高一致，且增大输出通道数
            nn.Conv2D(channels=256,kernel_size=5,padding = 2,activation='relu'),
            nn.MaxPool2D(pool_size=3,strides=2),
            # 连续三个卷积层，且使⽤更⼩的卷积窗⼝。除了最后的卷积层外，进⼀步增⼤了输出通道数。
            # 前两个卷积层后不使⽤池化层来减⼩输⼊的⾼和宽
            nn.Conv2D(channels=384,kernel_size=3,padding=1,activation='relu'),
            nn.Conv2D(channels=384,kernel_size=3,padding=1,activation='relu'),
            nn.Conv2D(channels=256,kernel_size=3,padding=1,activation='relu'),
            nn.MaxPool2D(pool_size=3,strides=2),
            
            #这里全连接层使用dropout来缓解了过拟合
            nn.Dense(512,activation='relu'),nn.Dropout(0.5),
            nn.Dense(512,activation='relu'),nn.Dropout(0.5),
            #输出层 
            nn.Dense(10)
            )

In [7]:
X = nd.uniform(shape=(1,1,224,224),ctx=ctx)
AlexNet.initialize(ctx= ctx)

for layer in AlexNet:
    X = layer(X)
    print(layer.name,'output shape',X.shape)

conv0 output shape (1, 96, 54, 54)
pool0 output shape (1, 96, 26, 26)
conv1 output shape (1, 256, 26, 26)
pool1 output shape (1, 256, 12, 12)
conv2 output shape (1, 384, 12, 12)
conv3 output shape (1, 384, 12, 12)
conv4 output shape (1, 256, 12, 12)
pool2 output shape (1, 256, 5, 5)
dense0 output shape (1, 512)
dropout0 output shape (1, 512)
dense1 output shape (1, 512)
dropout1 output shape (1, 512)
dense2 output shape (1, 10)


## 读取数据，数据还是采用FashionMNIST数据，读取时将图像的高和宽扩大到AlexNet使用的图像高和宽224.可以使用Resize实现。

In [8]:
def load_data_fashion_mnist(batch_size,resize=None):
    transformer = []
    path = '../chapter1_baseKnowledge/FashionMNIST/'
    if resize:
        transformer += [gdata.vision.transforms.Resize(resize)]
    transformer += [gdata.vision.transforms.ToTensor()]
    transformer = gdata.vision.transforms.Compose(transformer)
    mnist_train =gdata.vision.FashionMNIST(root=path,train=True)
    mnist_test =gdata.vision.FashionMNIST(root= path,train=False)
    
    train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),batch_size,shuffle = True)
    test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),batch_size,shuffle=False)
    
    return train_iter,test_iter

In [9]:
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

In [12]:
lr, num_epochs=0.01,20
AlexNet.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())
trainer = gluon.Trainer(AlexNet.collect_params(), 'sgd', {'learning_rate': lr})
gb.train_ch5(AlexNet, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 1.4745, train acc 0.453, test acc 0.708, time 152.3 sec
epoch 2, loss 0.7608, train acc 0.716, test acc 0.790, time 155.5 sec
epoch 3, loss 0.6219, train acc 0.769, test acc 0.814, time 152.0 sec
epoch 4, loss 0.5521, train acc 0.794, test acc 0.837, time 137.2 sec
epoch 5, loss 0.5035, train acc 0.816, test acc 0.845, time 136.9 sec
epoch 6, loss 0.4643, train acc 0.830, test acc 0.859, time 137.3 sec
epoch 7, loss 0.4370, train acc 0.843, test acc 0.870, time 140.0 sec
epoch 8, loss 0.4134, train acc 0.850, test acc 0.870, time 138.7 sec
epoch 9, loss 0.3935, train acc 0.858, test acc 0.878, time 137.3 sec
epoch 10, loss 0.3800, train acc 0.863, test acc 0.881, time 137.3 sec
epoch 11, loss 0.3657, train acc 0.868, test acc 0.882, time 138.9 sec
epoch 12, loss 0.3530, train acc 0.872, test acc 0.891, time 136.7 sec
epoch 13, loss 0.3418, train acc 0.876, test acc 0.892, time 136.8 sec
epoch 14, loss 0.3346, train acc 0.879, test acc 0.888, time 139.4 

KeyboardInterrupt: 