# 定义模型

In [10]:
from mxnet.gluon import nn
net = nn.Sequential()
net.add(nn.Conv2D(96, kernel_size=11, strides=4, padding=2,activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
        nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 连续3个卷积层，且使用更小的卷积窗口。除了最后的卷积层外，进一步增大了输出通道数。
        # 前两个卷积层后不使用池化层来减小输入的高和宽
        nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
        nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
        nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
        nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
        nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
        # 输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
        nn.Dense(10))

In [11]:
from mxnet import nd 
X=nd.random.uniform(shape=(1,1,224,224))
net.initialize()
for layer in net:
    X=layer(X)
    print(f"{layer.name}'s output shape: {X.shape}")

conv5's output shape: (1, 96, 57, 55)
pool3's output shape: (1, 96, 28, 27)
conv6's output shape: (1, 256, 28, 27)
pool4's output shape: (1, 256, 13, 13)
conv7's output shape: (1, 384, 13, 13)
conv8's output shape: (1, 384, 13, 13)
conv9's output shape: (1, 256, 13, 13)
pool5's output shape: (1, 256, 6, 6)
dense3's output shape: (1, 4096)
dropout2's output shape: (1, 4096)
dense4's output shape: (1, 4096)
dropout3's output shape: (1, 4096)
dense5's output shape: (1, 10)


# 加载Fashion-MNIST作为训练数据集

In [14]:
from utils import load_data_fashion_mnist as load 
batch_size,resize,root=128,224,'fashion-mnist/'
train_iter,test_iter=load.load_data_fashion_mnist(batch_size,root,resize)
for t,l in train_iter:
    print(t.shape)
    break

(128, 1, 224, 224)


# 训练

In [17]:
from mxnet import autograd,nd 
from mxnet.gluon import loss,Trainer
from mxnet import init
from utils.try_gpu import try_gpu
from utils.evaluate_acc import evaluate_acc
ctx=try_gpu()
loss=loss.SoftmaxCrossEntropyLoss()
net.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())

def train(train_iter,test_iter,batch_size,net,epochs,lr,loss,evaluate_acc):
    import time 
    trainer=Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
    for e in range(epochs):
        train_loss,train_acc,test_acc,start,n=.0,.0,.0,time.time(),len(train_iter)
        for X,y in train_iter:
            with autograd.record():
                out=net(X)
                l=loss(out,y)
            l.backward()
            trainer.step(batch_size)
            train_loss+=l.mean()
            y=y.astype('float32')
            train_acc+=(out.argmax(axis=1)==y).mean().asscalar()
        test_acc=evaluate_acc(test_iter,net,ctx)
        print('epoechs:%d, loss: %.4f, train_acc:%.3f, test_acc:%.3f ,used time: %.1f sec'%(e+1,train_loss/n,train_acc/n,test_acc,time.time()-start))
    

In [18]:
epochs,lr=5,.1
train(train_iter,test_iter,batch_size,net,epochs,lr,loss,evaluate_acc)

KeyboardInterrupt: 