# 多层感知机的从0开始实现

In [1]:
import torch
from torch import nn
from d2l import torch as d2l

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

实现一个具有单隐藏层的多层感知机（有1个隐藏层也是多层），他包含256个隐藏单元

In [2]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
# nn.Parameter()可加可不加，就是声明是一个torch的parameter
W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens,requires_grad=True))
b1 = nn.Parameter(torch.zeros(num_hiddens,requires_grad=True))

W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs,requires_grad=True))
b2 = nn.Parameter(torch.zeros(num_outputs,requires_grad=True))

params = [W1, b1, W2, b2]

实现ReLU激活函数

In [3]:
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

实现模型

In [5]:
def net(X):
    X = X.reshape(-1, num_inputs)
    H = relu(X @ W1 + b1) # torch.matmul() 和 @一样
    return (H @ W2 + b2) # 隐藏层输出和第二层权重相乘+bias

loss = nn.CrossEntropyLoss()

训练

In [None]:
num_epochs, lr = 10, 0.1
updater = torch.optim.SGD(params, lr=lr)
train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
# 模型更大了，数据拟合性更好，所以损失下降

# 多层感知机简洁实现

In [7]:
import torch
from torch import nn
from d2l import torch as d2l

In [9]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
net = nn.Sequential(nn.Flatten(),  # 28,28 -> 784
                    nn.Linear(num_inputs, num_hiddens), # 直接nn.Linear()不用自己计算wx+b了
                    nn.ReLU(), 
                    nn.Linear(num_hiddens, num_outputs))

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
        
net.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

训练

In [None]:
batch_size, lr, num_epochs = 256, 0.1, 10
loss = nn.CrossEntropyLoss()
updater = torch.optim.SGD(net.parameters(), lr=lr)

train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)