### 多层感知机的从零开始实现

In [16]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import sys

#### 获取和读取数据

In [17]:
batch_size = 256
train_data = torchvision.datasets.FashionMNIST('./data/FashionMNIST',train=True,transform=torchvision.transforms.ToTensor(),download=True)
test_data = torchvision.datasets.FashionMNIST('./data/FashionMNIST',train=False,transform=torchvision.transforms.ToTensor(),download=True)

train_iter = torch.utils.data.DataLoader(train_data,batch_size,shuffle=True)
test_iter = torch.utils.data.DataLoader(test_data,batch_size,shuffle=False)

#### 定义模型参数

In [18]:
num_inputs = 28 * 28
num_outputs = 10
num_hiddens = 256 # 隐藏单元个数
w_1 = torch.tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens)),dtype=torch.float)
b_1 = torch.zeros(num_hiddens,dtype=torch.float)
w_2 = torch.tensor(np.random.normal(0,0.01,(num_hiddens,num_outputs)),dtype=torch.float)
b_2 = torch.zeros(num_outputs,dtype=torch.float)

params = [w_1,b_1,w_2,b_2]
for param in params:
    param.requires_grad_(requires_grad=True)

#### 定义激活函数:
    使用基础的max函数来实现ReLU

In [19]:
def relu(x):
#     return torch.max(torch.tensor([x,0]))
    a = torch.zeros_like(x)
    return torch.max(x,a)

#### 定义模型
![mlp_2](./img/3.6/mlp_2.png)

In [20]:
def net(x):
    x = x.view(-1,num_inputs)
    H = relu(torch.mm(x,w_1) + b_1)
    O = torch.mm(H,w_2) + b_2
    return O

#### 定义损失函数

In [21]:
# 直接使用PyTorch提供的包括softmax运算和交叉熵损失计算的函数
loss_fn = torch.nn.CrossEntropyLoss()

#### 训练模型

In [22]:
# 设超参数迭代周期数为5，学习率为100.0
eporchs = 5
lr = 100.0
for epoch in range(eporchs):
    train_loss_sum,train_acc_sum,train_n = 0.0,0.0,0
    test_acc_sum,test_n = 0.0,0
    for x,y in train_iter:
        y_hat = net(x)
        loss = loss_fn(y_hat,y)
        if params is not None and params[0].grad is not None:
            for param in params:
                param.grad.data.zero_()
            
        loss.backward()
        for param in params:
            param.data -= param.grad * lr / batch_size
        
        train_acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
        train_loss_sum += loss.item()
        train_n += y.shape[0]
        
    for x,y in test_iter:
        y_hat = net(x)
        test_acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
        test_n += y.shape[0]
    print('epoch: %d tran_loss: %.4f train_acc: %.3f test_acc: %.3f'%(epoch+1
                                                                      ,train_loss_sum / train_n,train_acc_sum / train_n
                                                                      ,test_acc_sum / test_n))

epoch: 1 tran_loss: 0.0030 train_acc: 0.715 test_acc: 0.800
epoch: 2 tran_loss: 0.0019 train_acc: 0.821 test_acc: 0.800
epoch: 3 tran_loss: 0.0017 train_acc: 0.844 test_acc: 0.827
epoch: 4 tran_loss: 0.0015 train_acc: 0.855 test_acc: 0.819
epoch: 5 tran_loss: 0.0015 train_acc: 0.861 test_acc: 0.857


### 多层感知机的简洁实现

#### 定义模型

In [49]:
# class Net(torch.nn.Module):
#     def __init__(self,num_inputs,num_hiddens,num_outputs):
#         super(Net,self).__init__()
#         self.fl = torch.nn.Flatten()
#         self.mlp_1 = torch.nn.Linear(num_inputs,num_hiddens)
#         self.act = torch.nn.ReLU()
#         self.mlp_2 = torch.nn.Linear(num_hiddens,num_outputs)
        
#     def forward(self,x):
#         x = self.fl(x)
#         out = self.mlp_1(x)
#         out = self.act(out)
#         out = self.mlp_2(out)
#         return out
    
net = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(num_inputs,num_hiddens),
    torch.nn.ReLU(),
    torch.nn.Linear(num_hiddens,num_outputs)
)

In [50]:
# net = Net(num_inputs,num_hiddens,num_outputs)
net

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [54]:
# 对模型的 参数 进行重新初始化赋值
for param in net.parameters():
    torch.nn.init.normal_(param,mean=0,std=0.01)

In [55]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.5)

In [48]:
for epoch in range(eporchs):
    train_loss_sum,train_acc_sum,train_n = 0.0,0.0,0
    test_acc_sum,test_n = 0.0,0
    for x,y in train_iter:
        y_hat = net(x)
        loss = loss_fn(y_hat,y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
        train_loss_sum += loss.item()
        train_n += y.shape[0]
        
    for x,y in test_iter:
        y_hat = net(x)
        test_acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
        test_n += y.shape[0]
    print('epoch: %d tran_loss: %.4f train_acc: %.3f test_acc: %.3f'%(epoch+1
                                                                      ,train_loss_sum / train_n,train_acc_sum / train_n
                                                                      ,test_acc_sum / test_n))

epoch: 1 tran_loss: 0.0031 train_acc: 0.703 test_acc: 0.795
epoch: 2 tran_loss: 0.0019 train_acc: 0.823 test_acc: 0.829
epoch: 3 tran_loss: 0.0016 train_acc: 0.846 test_acc: 0.846
epoch: 4 tran_loss: 0.0015 train_acc: 0.856 test_acc: 0.859
epoch: 5 tran_loss: 0.0014 train_acc: 0.865 test_acc: 0.830
