In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init

import numpy as np
import d2lzh_pytorch as d2l

from collections import OrderedDict

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [3]:
num_inputs = 28 * 28
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        # 这里的super，在Python2中常写成 super(类名, self)，Python3中可简化为 super()
        super().__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
        
        
    # 这里写linear层的时候x是按batch输入的，因此要把x的size转换为(batch_size, 784)    
    def forward(x):
        y = self.linear(x.view(x.shape[0], -1))
        return y
    
net = LinearNet(num_inputs, num_outputs)

把对x的形状进行变换的操作写成一个层

In [4]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        return x.view(x.shape[0], -1)

简化的定义模型的方法

In [5]:
net = nn.Sequential(
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs))
    ])
)

初始化模型参数

In [6]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [7]:
loss = nn.CrossEntropyLoss()

In [8]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [9]:
num_epochs = 5

for i in range(1, num_epochs + 1): 
    train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
    for X, y in train_iter:
        output = net(X)
        l = loss(output, y).sum()
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        
        train_l_sum += l.item()
        train_acc_sum += (output.argmax(dim=1) == y).sum().item()
        n += y.shape[0]
    
    print("epoch %d, loss %.4f, train loss %.4f, train acc %.3f" % (i, l.item(), train_l_sum / n, train_acc_sum / n))

epoch 1, loss 0.4670, train loss 0.0031, train acc 0.749
epoch 2, loss 0.5305, train loss 0.0022, train acc 0.813
epoch 3, loss 0.5583, train loss 0.0021, train acc 0.826
epoch 4, loss 0.4229, train loss 0.0020, train acc 0.833
epoch 5, loss 0.4030, train loss 0.0019, train acc 0.836


训练需要更多时间，而预测需要的时间相对很短

In [10]:
d2l.evaluate_accuracy(test_iter, net)

0.816