### 层和块
首先，我们回顾一下**多层感知机**

In [2]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[ 0.0242,  0.1153,  0.2040,  0.0287, -0.0266,  0.0159,  0.0609,  0.0128,
          0.2768,  0.1767],
        [-0.0182,  0.0165,  0.1345,  0.0110, -0.0769, -0.0263,  0.0359,  0.0306,
          0.1647,  0.1684]], grad_fn=<AddmmBackward0>)

> `nn.Sequential`定义了一种特殊的`Module`

### 自定义块

In [4]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

**实例化多层感知机的层，然后在每次调用正向传播函数时调用这些层**

In [6]:
net = MLP()
net(X)

tensor([[-0.0072, -0.2892,  0.1307, -0.0346,  0.0774,  0.3824,  0.0403,  0.0860,
          0.1285,  0.0466],
        [-0.0055, -0.2632,  0.1058, -0.0091,  0.0549,  0.1241, -0.0058, -0.1128,
          0.1322,  0.0235]], grad_fn=<AddmmBackward0>)

### 顺序块

In [10]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block    # 这里_modules是OrderedDict类型
    
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.1484, -0.0229,  0.0901, -0.0608,  0.0258,  0.0669, -0.1206, -0.0817,
         -0.1542,  0.1571],
        [ 0.0770,  0.0306,  0.0601, -0.0318,  0.1206,  0.0801, -0.1502, -0.0202,
         -0.0664,  0.1646]], grad_fn=<AddmmBackward0>)

**在正向传播函数中执行代码，有时`torch.Sequential`无法满足需求，需要我们自定义函数**

In [12]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False)    # 不可训练参数
        self.linear = nn.Linear(20, 20)
    
    def forward(self, x):
        x = self.linear(x)
        x = F.relu(torch.mm(x, self.rand_weight) + 1)
        x = self.linear(x)
        while x.abs().sum() > 1:
            x /= 2
        return x.sum()  # 返回标量

net = FixedHiddenMLP()
net(X)

tensor(0.2683, grad_fn=<SumBackward0>)

**组合搭配各种组合块方法**

In [21]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    
    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.3256, grad_fn=<SumBackward0>)