In [1]:
import sys
sys.path.append('..')
import gluonbook as gb
from gluonbook import utils
from mxnet import autograd, gluon, nd
from mxnet.gluon import loss as gloss

  from ._conv import register_converters as _register_converters


In [2]:
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)

In [3]:
num_inputs = 28*28
num_outputs = 10
# 隐藏层为256个节点，用随机0.01初始化
num_hiddens = 256
weight_scale = .01

W1 = nd.random.normal(shape=(num_inputs, num_hiddens), scale=weight_scale)
b1 = nd.zeros(num_hiddens)
W2 = nd.random.normal(shape=(num_hiddens, num_outputs), scale=weight_scale)
b2 = nd.zeros(num_outputs)
W3 = nd.random.normal(shape=(num_outputs, num_outputs), scale=weight_scale)
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2,W3,b3]

for param in params:
    param.attach_grad()

## 激活函数

如果我们一直用线性操作符构造神经网络，那么整体仍然是一个线性模型。因为
`y=X*W1*W2=X*W3`
这里`W3=W1*W2`
为了我们模型可以拟合非线性的模型，需要在层之间插入非线性的激活函数。这里使用ReLU
    `relu(x)=max(x,0)`

In [4]:
def relu(X):
    return nd.maximum(X, 0)

## 定义模型

我们通过reshape函数将每张原始图片改成长度为num_inputs的向量
把层（全连接）和激活函数串起来

In [5]:
def net(X):
    X = X.reshape((-1, num_inputs))
    H = relu(nd.dot(X, W1) + b1)
    H1 = nd.dot(H, W2)+ b2
    return nd.dot(H1, W3) + b3

## 定义损失函数

为了得到更好的数值稳定性，我们直接使用Gluon提供的包括Softmax运算和交叉熵损失计算的函数。

In [6]:
softmax_cross_entropy = gloss.SoftmaxCrossEntropyLoss()

## 训练模型


In [7]:
learning_rate = .5

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_iter:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output,label)
        loss.backward()
        utils.SGD(params,learning_rate/batch_size)
        
        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output,label)
    test_acc = utils.evaluate_accuracy(test_iter,net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch,train_loss/len(train_iter),train_acc/len(train_iter),test_acc))

Epoch 0. Loss: 1.520517, Train acc 0.403295, Test acc 0.672877
Epoch 1. Loss: 0.671699, Train acc 0.740685, Test acc 0.772937
Epoch 2. Loss: 0.533535, Train acc 0.800948, Test acc 0.812500
Epoch 3. Loss: 0.479998, Train acc 0.822316, Test acc 0.847857
Epoch 4. Loss: 0.439475, Train acc 0.837724, Test acc 0.810897
