# 3.9 多层感知机的从零开始实现

In [1]:
import torch
import numpy as np
import sys
sys.path.append("..") # 为了导入上层目录的d2lzh_pytorch
import d2lzh_pytorch as d2l

print(torch.__version__)

1.0.0


## 3.9.1 获取和读取数据

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

## 3.9.2 定义模型参数

In [3]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256

W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)

params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

## 3.9.3 定义激活函数

In [9]:
def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))

In [10]:
# test relu() function
X = torch.tensor([[-2,-3], [-4,3]], dtype=torch.float32)
H = relu(X)
print(H)

tensor([[0., 0.],
        [0., 3.]])


## 3.9.4 定义模型

In [14]:
def net(X):
    X = X.view((-1, num_inputs))
    H = relu(torch.matmul(X, W1) + b1)
    return torch.matmul(H, W2) + b2

In [16]:
# test torch.matmul Pytorch API
# torch.matmul is used for mat multiply
test0 = torch.tensor([3,4])
test1 = torch.tensor([[2],[1]])
print(torch.matmul(test0,test1))

tensor([10])


## 3.9.5 定义损失函数
**说明**:这里使用的是交叉熵损失计算的函数

In [12]:
loss = torch.nn.CrossEntropyLoss()

## 3.9.6 训练模型

In [13]:
num_epochs, lr = 5, 100.0
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0022, train acc 0.829, test acc 0.833
epoch 2, loss 0.0017, train acc 0.841, test acc 0.832
epoch 3, loss 0.0016, train acc 0.852, test acc 0.822
epoch 4, loss 0.0015, train acc 0.857, test acc 0.847
epoch 5, loss 0.0015, train acc 0.862, test acc 0.849
