# Build the Neural Network
Neural networks comprise of layers/modules that perform operations on data. The `torch.nn` namespace provides all the building blocks you need to build your own neural network. Every module in PyTorch subclasses the `nn.Module`. A neural network is a module itself that consists of other modules (layers). This nested structure allows for building and managing complex architectures easily.

神经网络由层和模块组成，每个模块都是nn.Module 的子类。PyTorch 允许通过模块的嵌套结构来构建和管理复杂的神经网络架构，从而提供了高可复用性和模块化的设计。

In [1]:
import os 
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = (
    'cuda'
    if torch.cuda.is_available()
    else 'mps'
    if torch.backends.mps.is_available()
    else 'cpu'
)
print(f'使用 {device}')

使用 cpu


## Define the class

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits) # 将分数转变为概率
y_pred = pred_probab.argmax(1) # 在第 1 个维度寻找最大值的索引
print(f'预测: {y_pred}')

预测: tensor([5])


In [6]:
print(logits)
print(pred_probab)

tensor([[ 0.0482, -0.0345,  0.0120, -0.0160, -0.0301,  0.1236, -0.0007,  0.0507,
         -0.0140, -0.0276]], grad_fn=<AddmmBackward0>)
tensor([[0.1037, 0.0954, 0.1000, 0.0972, 0.0959, 0.1118, 0.0987, 0.1039, 0.0974,
         0.0961]], grad_fn=<SoftmaxBackward0>)


---
## Model Layers

In [7]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


### nn.Flatten
常用与图像识别中，将二维的图像转换为 一维的张量

In [8]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


### nn.Linear
线性层，也叫全连接层，主要用于维度的变换，包括数据的升维和降维

In [9]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


### nn.ReLU
[link](https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html)
> torch.nn.ReLU(inplace=False)

$$
ReLU(x) = (x)^{+} = max(0,x)
$$
![](https://pytorch.org/docs/stable/_images/ReLU.png)

In [10]:
print(f'ReLU 之前: {hidden1}\n\n')
hidden1 = nn.ReLU()(hidden1)
print(f'ReLU 之后: {hidden1}')

ReLU 之前: tensor([[-0.3114,  0.2477,  0.1777, -0.3673,  0.3306,  0.7543, -0.3078, -0.0452,
          0.3987, -0.0351, -0.3997, -0.4820, -0.5678,  0.1690,  0.1324, -0.0691,
         -0.3324, -0.2247,  0.6033, -0.3391],
        [-0.4664,  0.1928,  0.1604, -0.4897,  0.1591,  0.8991, -0.2854,  0.0924,
          0.3407, -0.1260, -0.1425, -0.0817, -0.5721,  0.0176,  0.2367, -0.0274,
         -0.5785,  0.0199,  0.4283, -0.3838],
        [-0.0454,  0.1482,  0.1695, -0.1130,  0.2978,  0.9596, -0.3010, -0.1943,
          0.4031,  0.1211, -0.1663, -0.5490, -0.4800,  0.1672,  0.1655, -0.0535,
         -0.3592,  0.0292,  0.3593, -0.1802]], grad_fn=<AddmmBackward0>)


ReLU 之后: tensor([[0.0000, 0.2477, 0.1777, 0.0000, 0.3306, 0.7543, 0.0000, 0.0000, 0.3987,
         0.0000, 0.0000, 0.0000, 0.0000, 0.1690, 0.1324, 0.0000, 0.0000, 0.0000,
         0.6033, 0.0000],
        [0.0000, 0.1928, 0.1604, 0.0000, 0.1591, 0.8991, 0.0000, 0.0924, 0.3407,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0176, 0.2367, 0.0

### nn.Sequential
可以看作是一个有序的容器，包含需要添加的模块

In [11]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
print(logits)

tensor([[ 0.1635,  0.2807, -0.3064, -0.2311, -0.0735,  0.2493, -0.1902,  0.1324,
         -0.4660,  0.2927],
        [ 0.0198,  0.0293, -0.1512, -0.1142, -0.1153,  0.2867, -0.3429,  0.2757,
         -0.5296,  0.3000],
        [ 0.1398,  0.2112, -0.2424, -0.2588, -0.1130,  0.3798, -0.2369,  0.1930,
         -0.5710,  0.3254]], grad_fn=<AddmmBackward0>)


### nn.Softmax
> torch.nn.Softmax(dim=None)

将数据缩放到\[0,1\]的区间，并且，使得总和为 1

$$
Softmax(x_i) = \frac{exp(x_i)}{\sum_j exp(x_j)}
$$


In [12]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab)
print(sum(pred_probab[0]))

tensor([[0.1157, 0.1301, 0.0723, 0.0780, 0.0913, 0.1260, 0.0812, 0.1121, 0.0616,
         0.1316],
        [0.1021, 0.1031, 0.0860, 0.0893, 0.0892, 0.1333, 0.0710, 0.1319, 0.0589,
         0.1351],
        [0.1122, 0.1205, 0.0766, 0.0753, 0.0872, 0.1427, 0.0770, 0.1183, 0.0551,
         0.1351]], grad_fn=<SoftmaxBackward0>)
tensor(1.0000, grad_fn=<AddBackward0>)


## Model Parameters

In [13]:
print(f'模型的结构: {model}\n\n')

for name, param in model.named_parameters():
    print(f'层: {name} | 大小: {param.size()} | 值: {param[:2]} \n')

模型的结构: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


层: linear_relu_stack.0.weight | 大小: torch.Size([512, 784]) | 值: tensor([[ 0.0338, -0.0293, -0.0277,  ...,  0.0012,  0.0050,  0.0010],
        [-0.0065,  0.0264, -0.0259,  ..., -0.0356,  0.0262, -0.0277]],
       grad_fn=<SliceBackward0>) 

层: linear_relu_stack.0.bias | 大小: torch.Size([512]) | 值: tensor([ 0.0089, -0.0085], grad_fn=<SliceBackward0>) 

层: linear_relu_stack.2.weight | 大小: torch.Size([512, 512]) | 值: tensor([[ 0.0146, -0.0281, -0.0083,  ...,  0.0273,  0.0146,  0.0284],
        [ 0.0075,  0.0313,  0.0363,  ...,  0.0395,  0.0254,  0.0371]],
       grad_fn=<SliceBackward0>) 

层: linear_relu_stack.2.bias | 大小: torch.Size([512]) | 值: tensor([ 0.0418, -0.036