In [3]:
!pip install torchtext
import torch
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l

In [5]:
# 获取数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size,root='/home/kesci/input/FashionMNIST2065')

In [30]:
# 定义模型参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256  #输入像素28*28=784 输出类别10（种） 隐藏层单元个数256

# 第一层权重/偏置 W1/b1
W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)  
b1 = torch.zeros(num_hiddens, dtype=torch.float)
# 第二层(隐藏层)权重/偏置 W2/b2  
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)

# 合并所有参数，赋能梯度
params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

In [8]:
# 定义激活函数
def relu(x):
    return torch.max(input=x, other=torch.tensor(0.0))

In [21]:
# 定义网络
def net(x):
    x = x.view((-1, num_inputs))
    H = relu(torch.mm(x, W1) + b1)
    return torch.mm(H, W2) + b2

In [12]:
# 定义损失函数
loss = torch.nn.CrossEntropyLoss()

In [34]:
# 训练
def train_MLP(net, epoch_nums, train_iter, test_iter,
              batch_size, loss, params, lr=None, optimizer=None):
    for epoch in range(epoch_nums):
        train_loss_sum, train_acc_sum, n = 0, 0, 0
        for x,y in train_iter:
            y_hat = net(x)  # 一个batchsize的输入x,得预测值
            _loss = loss(y_hat, y).sum()  # 与真实值对比得损失值(总和)
            # 梯度清零
            if params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            # 梯度求解-反向传播BP
            _loss.backward()
            # 参数优化
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            # optimizer.step()
            # loss是个标量，在pytorch里用item取出这个唯一的元素,最终目的是取一个epoch的loss平均值
            train_loss_sum += _loss.item()
            # 训练中预测准确的个数
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]  # 总输出个数，用于计算准确率
        test_acc = d2l.evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))

In [35]:
num_epochs, lr = 5, 100.0
train_MLP(net, num_epochs, train_iter, test_iter, batch_size, loss, params, lr)

epoch 1, loss 0.0019, train acc 0.822, test acc 0.791
epoch 2, loss 0.0017, train acc 0.845, test acc 0.824
epoch 3, loss 0.0015, train acc 0.855, test acc 0.841
epoch 4, loss 0.0014, train acc 0.864, test acc 0.844
epoch 5, loss 0.0014, train acc 0.868, test acc 0.840


In [37]:
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0011, train acc 0.892, test acc 0.858
epoch 2, loss 0.0011, train acc 0.894, test acc 0.878
epoch 3, loss 0.0011, train acc 0.898, test acc 0.879
epoch 4, loss 0.0011, train acc 0.899, test acc 0.877
epoch 5, loss 0.0010, train acc 0.901, test acc 0.854
