# 多层感知机的从零实现

In [1]:
import numpy as np
import pandas as pd
import torch
from torchvision import datasets
from d2l import torch as d2l

## 准备数据集

In [2]:
dataset = d2l.FashionMNIST(batch_size=64)

In [3]:
train_iter = dataset.get_dataloader(train=True)
test_iter = dataset.get_dataloader(train=False)

In [4]:
for X, y in train_iter:
    print(X.shape, y.shape)
    print(np.unique(y))
    break

torch.Size([64, 1, 28, 28]) torch.Size([64])
[0 1 2 3 4 5 6 7 8 9]


## 定义网络结构

### 线性层

In [5]:
class Linear:
    def __init__(self, input_size, output_size):
        self.W = torch.normal(0, 0.01, size=(input_size, output_size), requires_grad=True)
        self.b = torch.zeros(output_size, requires_grad=True)
        self.params = [self.W, self.b]

    def __call__(self, H):
        return H @ self.W +self.b

### 激活层

In [6]:
def sigmoid(H):
    return 1 / (1 + torch.exp(H))
def ReLu(H):
    H[H<0] = 0
    return H

### 定义SoftMax层

In [7]:
def SoftMax(H: torch.Tensor):
    expH = torch.exp(H)
    return expH / expH.sum(1, keepdim=True)

### 定义神经网络

In [None]:
class MLP:
    def __init__(self):
        self.Sequential = [
            ('01-linear(784, 1000)', Linear(28*28, 1000)), 
            ('02-ReLu', ReLu),
            ('03-linear(1000, 100)', Linear(1000, 100)),
            ('04-ReLu',ReLu),
            ('05-Linear(100, 10)', Linear(100, 10)), 
            ('06-SoftMax', SoftMax)
        ]

    def show(self, X):
        for layner_name, layer in self.Sequential:
            print(f"{layner_name:30}{X.shape}", end=' -> ')
            X = layer(X)
            print(X.shape)
        return X

    def get_params(self):
        """获取神经网络中全部参数"""
        params = []
        for _, layer in self.Sequential:
            if 'params' in layer.__dict__:
                params.extend(layer.params)
        return params

    def __call__(self, X):
        for _, layer in self.Sequential:
            X = layer(X)
        return X

### 测试

In [9]:
linear = Linear(10, 2)
H = torch.arange(100, dtype=torch.float32).reshape(-1, 10)
linear(H)

tensor([[3.6259e-01, 3.0952e-03],
        [9.4124e-01, 6.5784e-02],
        [1.5199e+00, 1.2847e-01],
        [2.0985e+00, 1.9116e-01],
        [2.6772e+00, 2.5385e-01],
        [3.2558e+00, 3.1654e-01],
        [3.8345e+00, 3.7923e-01],
        [4.4131e+00, 4.4192e-01],
        [4.9918e+00, 5.0460e-01],
        [5.5704e+00, 5.6729e-01]], grad_fn=<AddBackward0>)

In [10]:
net = MLP()
params = net.get_params()

In [None]:
X = torch.normal(0, 0.1, size=(5, 28*28))
y = net.show(X)


01-linear(784, 1000)          torch.Size([5, 784]) -> torch.Size([5, 1000])
02-ReLu                       torch.Size([5, 1000]) -> torch.Size([5, 1000])
03-linear(1000, 100)          torch.Size([5, 1000]) -> torch.Size([5, 100])
04-ReLu                       torch.Size([5, 100]) -> torch.Size([5, 100])
05-Linear(100, 10)            torch.Size([5, 100]) -> torch.Size([5, 10])
06-SoftMax                    torch.Size([5, 10]) -> torch.Size([5, 10])


In [12]:
for param in params:
    print(param.shape)

torch.Size([784, 1000])
torch.Size([1000])
torch.Size([1000, 100])
torch.Size([100])
torch.Size([100, 10])
torch.Size([10])


In [13]:
l = y.sum()

In [14]:
params[-1].grad

In [15]:
l.backward()
params[-1].grad

tensor([7.4486e-09, 7.4497e-09, 7.4506e-09, 7.4537e-09, 7.4535e-09, 7.4533e-09,
        7.4509e-09, 7.4505e-09, 7.4501e-09, 7.4509e-09])

## 定义损失函数

In [16]:
def cross_entropy(y_hat, y):
    return - torch.log(y_hat[range(len(y_hat)), y])