# 使用``gluon``重新看待多层感知机

在``gluon``中有两种方式来定义多层神经网络

1.使用``gluon.Block``，所有的模块都集成自``block``.

2.使用``gluon.nn.Sequential()``来堆叠网络.

In [5]:
import numpy as np
import mxnet as mx

from mxnet import nd
from mxnet import gluon
from mxnet import autograd

import utils

In [6]:
ctx = mx.cpu()

In [7]:
batch_size = 128
num_inputs = 784
num_outputs = 10
num_examples = 60000

train_data, test_data = utils.load_dataset(batch_size, data_type="fashion_mnist")

## 1.使用``gluon.Block``

In [8]:
class MLP(gluon.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.dense0 = gluon.nn.Dense(64)
        self.dense1 = gluon.nn.Dense(64)
        self.dense2 = gluon.nn.Dense(10)
        
    def forward(self, X):
        X = self.dense0(X)
        print("Hidden1 X : ", X.shape)
        X = self.dense1(X)
        print("Hidden2 X : ", X.shape)
        X = self.dense2(X)
        print("Output X : ", X.shape)
        return X

In [9]:
net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01))

In [10]:
# test shape
a = nd.ones((64, 784))
output = net(a)
output.shape

Hidden1 X :  (64, 64)
Hidden2 X :  (64, 64)
Output X :  (64, 10)


(64, 10)

## 2.使用``gluon.nn.Sequential``

In [11]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(256, activation='relu'))
    net.add(gluon.nn.Dense(256, activation='relu'))
    net.add(gluon.nn.Dense(10))
    
net.collect_params().initialize(mx.init.Normal(sigma=.01))

In [12]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [13]:
learning_rate = 0.1
weight_decay = 0.01
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate})

In [14]:
utils.evaluate_accuracy_gluon(test_data, net, ctx)

0.1027

In [None]:
epochs = 10

for epoch in range(epochs):
    cumulative_loss = .0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)   
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        cumulative_loss += nd.sum(loss).asscalar()
        
    train_acc = utils.evaluate_accuracy_scratch(train_data, net, ctx)
    test_acc = utils.evaluate_accuracy_scratch(test_data, net, ctx)
    
    print("Epoch %s, Train loss %s, Train acc %s, Test acc %s." 
          % (epoch, cumulative_loss / num_examples, train_acc, test_acc))

Epoch 0, Train loss 1.15925387847, Train acc 0.752133, Test acc 0.7455.
Epoch 1, Train loss 0.618777207756, Train acc 0.82075, Test acc 0.8198.
Epoch 2, Train loss 0.508218743642, Train acc 0.834683, Test acc 0.8325.
Epoch 3, Train loss 0.452119665305, Train acc 0.85165, Test acc 0.8527.
Epoch 4, Train loss 0.414959046936, Train acc 0.863767, Test acc 0.859.
Epoch 5, Train loss 0.390483329391, Train acc 0.866017, Test acc 0.8569.
Epoch 6, Train loss 0.369221753915, Train acc 0.876817, Test acc 0.8703.
Epoch 7, Train loss 0.355839974435, Train acc 0.8774, Test acc 0.8686.
Epoch 8, Train loss 0.341571791744, Train acc 0.884767, Test acc 0.8754.
