In [6]:
import torch
import torch.nn as nn
import torch.nn.init as init

import numpy as np
import d2lzh_pytorch as d2l

from collections import OrderedDict

In [4]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [5]:
num_inputs = 28 * 28
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        # 这里的super，在Python2中常写成 super(类名, self)，Python3中可简化为 super()
        super().__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
        
        
    # 这里写linear层的时候x是按batch输入的，因此要把x的size转换为(batch_size, 784)    
    def forward(x):
        y = self.linear(x.view(x.shape[0], -1))
        return y
    
net = LinearNet(num_inputs, num_outputs)

把对x的形状进行变换的操作写成一个层

In [9]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        return x.view(x.shape[0], -1)

简化的定义模型的方法

In [10]:
net = nn.Sequential(
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs))
    ])
)

初始化模型参数

In [11]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [12]:
loss = nn.CrossEntropyLoss()

In [13]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [17]:
num_epochs = 5

for i in range(1, num_epochs + 1):
    for X, y in train_iter:
        output = net(X)
        l = loss(output, y).sum()
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    print("epoch %d, loss: %f" % (i, l.item()))

epoch 1, loss: 0.584514
epoch 2, loss: 0.467656
epoch 3, loss: 0.706881
epoch 4, loss: 0.487947
epoch 5, loss: 0.479378
