# 使用mxnet的gluon接口来实现多层感知机

In [1]:
import mxnet as mx 
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from mxnet import gluon,nd,autograd
from mxnet.gluon import data as gdata,loss as gloss,nn

  from ._conv import register_converters as _register_converters


## 首先还是准备小批量数据

In [2]:
def ToTensor(data,label):
    return data.astype(np.float32)/255,label.astype(np.float32)
ctx = mx.gpu()

batch_size = 256
with mx.Context(ctx):
    trainData = gdata.vision.FashionMNIST(root="./FashionMNIST",train = True,transform=ToTensor)
    testData = gdata.vision.FashionMNIST(root="./FashionMNIST",train = False,transform=ToTensor)
    
    train_iter = gdata.DataLoader(trainData,batch_size,shuffle=True)
    test_iter = gdata.DataLoader(testData,batch_size,shuffle=False)

In [3]:
for X,y in train_iter:
    break
print(y)


[9. 0. 5. 8. 7. 9. 3. 2. 2. 8. 8. 0. 6. 8. 0. 5. 9. 9. 2. 4. 2. 9. 8. 5.
 5. 8. 0. 4. 6. 5. 3. 7. 1. 9. 0. 4. 2. 5. 4. 6. 1. 1. 1. 9. 6. 9. 9. 1.
 4. 8. 9. 7. 7. 9. 2. 4. 0. 0. 4. 0. 4. 8. 1. 6. 2. 1. 0. 7. 6. 8. 4. 1.
 3. 5. 3. 8. 4. 1. 3. 9. 4. 2. 1. 5. 3. 7. 3. 1. 3. 7. 7. 3. 3. 0. 7. 4.
 7. 8. 0. 5. 5. 2. 3. 2. 1. 2. 2. 2. 3. 1. 2. 7. 6. 0. 4. 6. 0. 5. 5. 5.
 4. 0. 2. 5. 5. 3. 9. 3. 0. 3. 4. 3. 9. 0. 3. 4. 3. 5. 0. 4. 9. 8. 8. 9.
 2. 5. 6. 1. 2. 8. 9. 6. 4. 7. 5. 8. 3. 0. 2. 6. 8. 5. 3. 2. 5. 8. 9. 9.
 1. 5. 4. 0. 6. 8. 9. 8. 8. 7. 8. 6. 2. 8. 2. 1. 3. 1. 5. 1. 3. 9. 9. 4.
 9. 6. 3. 8. 4. 4. 4. 3. 0. 8. 3. 3. 6. 0. 7. 9. 2. 8. 8. 4. 4. 7. 7. 9.
 0. 8. 8. 2. 4. 8. 9. 4. 8. 1. 8. 4. 7. 5. 5. 9. 7. 5. 6. 5. 8. 9. 3. 6.
 2. 3. 4. 0. 9. 5. 2. 9. 2. 6. 5. 5. 5. 0. 4. 1.]
<NDArray 256 @cpu(0)>


## 使用gluon搭建模型

In [4]:
from mxnet import init

net = nn.Sequential()  #定义一个容器
net.add(nn.Dense(1000,activation='relu'),
       nn.Dense(10))                      #为模型中添加层
net.initialize(init.Normal(sigma=0.01),ctx=ctx)   #初始化模型参数

## 定义损失函数

In [5]:
loss = gloss.SoftmaxCrossEntropyLoss()

## 定义优化器

In [6]:
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.5})

## 训练模型

In [7]:
def accuracy(y_hat,y):
    return (y_hat.argmax(axis = 1)==y).mean().asscalar()

In [8]:
def evaluate_accuracy(net,datas):
    acc = 0
    for X,y in datas:
        acc+=accuracy(net(X),y)
    return acc/len(datas)

In [12]:
with mx.Context(ctx):
    print(evaluate_accuracy(net,test_iter))

0.1787109375


In [15]:
def train_mlp(net,train_iter,test_iter,num_epochs,batch_size,loss,trainer):
    for epoch in range(0,num_epochs+1):
        train_l_sum = 0
        train_acc_sum = 0
        
        for X,y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat,y)
            l.backward()
            trainer.step(batch_size)
            
            train_l_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat,y)
        
        test_acc = evaluate_accuracy(net,test_iter)
        
        print('epoch %d train_loss %.4f train_acc %.3f test_acc %.3f'
             %(epoch+1,train_l_sum/len(train_iter),train_acc_sum/len(train_iter),test_acc))

In [16]:
num_epochs = 5
with mx.Context(ctx):
    train_mlp(net,train_iter,test_iter,num_epochs,batch_size,loss,trainer)

epoch 1 train_loss 0.4780 train_acc 0.834 test_acc 0.836
epoch 2 train_loss 0.4690 train_acc 0.835 test_acc 0.843
epoch 3 train_loss 0.4521 train_acc 0.843 test_acc 0.847
epoch 4 train_loss 0.4455 train_acc 0.846 test_acc 0.854
epoch 5 train_loss 0.4352 train_acc 0.849 test_acc 0.855
epoch 6 train_loss 0.4266 train_acc 0.852 test_acc 0.857
