# 多层感知机实现

In [1]:
import mxnet as mx
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from mxnet import nd,gluon

  from ._conv import register_converters as _register_converters


In [2]:
import mxnet.gluon.data as gdata
import mxnet.gluon.loss as gloss

## 首先加载数据

In [37]:
#将图像数据进行归一化
def ToTensor(X,y):
    return X.astype(np.float32)/255,y

ctx = mx.gpu()
batch_size = 256
with mx.Context(ctx):
    trainData = gdata.vision.FashionMNIST(root="./FashionMNIST",train=True,transform=ToTensor)
    testData  = gdata.vision.FashionMNIST(root="./FashionMNIST",train=False,transform=ToTensor)
    train_iter = gdata.DataLoader(trainData,batch_size,shuffle = True)
    test_iter = gdata.DataLoader(testData,batch_size,shuffle = False)

## 下面进行模型的搭建

## 首先定义 $Relu()$函数

In [27]:
def relu(X):
    return nd.maximum(X,0)

## 定义模型参数 

In [8]:
num_inputs = 784
num_outputs = 10
num_hiddens = 256
with ctx:
    W1 = nd.random.normal(scale=0.01,shape=(num_inputs,num_hiddens))
    b1 = nd.zeros(num_hiddens)

    W2 = nd.random.normal(scale=0.01,shape=(num_hiddens,num_outputs))
    b2 = nd.zeros(num_outputs)

    params = [W1,b1,W2,b2]

In [9]:
for param in params:
    param.attach_grad()

## 定义模型   输入层、隐藏层、输出层

In [12]:
def net(X):
    X = X.reshape((-1,num_inputs))
    H = relu(nd.dot(X,W1)+b1)
    return nd.dot(H,W2)+b2

## 定义损失函数

In [13]:
loss = gloss.SoftmaxCrossEntropyLoss()    

In [23]:
#随机梯度下降算法
def sgd(params,lr,batch_size):
    for param in params:
        param[:] = param[:]-lr*param[:].grad/batch_size

## 进行模型的训练

In [24]:
#评估模型的准确度
def accuracy(y_hat,y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()

def evaluate_accuracy(data_iter,net):
    acc = 0
    for X,y in data_iter:
        acc+=accuracy(net(X),y)
    return acc/len(data_iter)

In [29]:
from mxnet import autograd
def train_mlp(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr):
    #
    for epoch in range(num_epochs+1):
        train_l = 0
        train_acc = 0
        #首先取出小批量数据
        for X,y in train_iter:
            with autograd.record():
                y_hat = net(X)      #前向运算
                l = loss(y_hat,y)   #计算损失函数
            l.backward()           #反向传播、进行求导运算
            
            #使用sgd对参数进行优化
            sgd(params,lr,batch_size)
            
            #记录损失
            train_l += l.mean().asscalar()
            train_acc += accuracy(y_hat,y)
        
        #完成了一个epoch训练，进行一次测试并且输出结果
        
        test_acc = evaluate_accuracy(test_iter,net)
        
        #打印日志
        print('epoch %d train loss %.4f train acc %.3f test acc %.3f'
              %(epoch+1,train_l/len(train_iter),train_acc/len(train_iter),test_acc))

In [33]:
num_epochs = 5
lr =0.5
with ctx:
    train_mlp(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

epoch 1 train loss 0.8761 train acc 0.715 test acc 0.829
epoch 2 train loss 0.4946 train acc 0.818 test acc 0.848
epoch 3 train loss 0.4452 train acc 0.835 test acc 0.855
epoch 4 train loss 0.4090 train acc 0.849 test acc 0.862
epoch 5 train loss 0.3870 train acc 0.857 test acc 0.872
epoch 6 train loss 0.3713 train acc 0.863 test acc 0.870


In [38]:
for X,y in test_iter:
    print (y)
    break


[0 1 2 2 3 2 8 6 5 0 3 4 4 6 8 5 6 3 6 4 4 4 2 1 5 7 8 4 4 1 5 7 7 8 1 0 9
 8 0 8 2 0 4 6 2 0 3 3 2 3 2 2 9 3 0 9 9 4 6 0 4 5 4 6 1 1 0 9 5 2 7 3 4 6
 5 7 1 6 1 4 9 8 1 2 4 8 9 4 1 6 3 4 2 2 2 6 4 7 7 3 9 3 9 0 8 2 3 8 2 7 5
 5 3 2 7 5 0 2 7 1 0 5 4 4 7 0 8 5 0 3 1 7 9 4 9 6 4 4 2 4 3 3 3 2 2 6 0 0
 1 3 4 3 3 1 9 3 3 3 9 5 6 7 7 3 2 4 0 8 7 2 2 8 9 0 2 4 4 5 7 9 9 1 3 9 1
 5 5 6 0 7 4 9 1 6 0 0 0 4 0 9 0 4 2 5 5 8 6 2 1 9 0 4 7 1 9 5 9 0 2 8 5 7
 7 3 2 4 5 7 8 1 9 5 6 2 9 7 4 0 9 2 1 5 7 7 0 2 4 5 3 3 8 1 6 2 4 8]
<NDArray 256 @cpu(0)>
