In [1]:
import torch
from torch import nn
from torch.nn import functional as F

#### 多层感知机

1.用nn.Sequential简便定义

In [2]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[ 0.0079,  0.2971, -0.1664, -0.2609, -0.1028,  0.0553, -0.0844,  0.3181,
          0.1440,  0.0183],
        [-0.0120,  0.2243, -0.2270, -0.2375,  0.0023,  0.1284, -0.1863,  0.3751,
          0.2210,  0.0318]], grad_fn=<AddmmBackward>)

nn.Sequential 定义了一种特殊的 Module 。

Module：任何一个层或一个神经网络应该都是Module的一个子类。

2.自定义一个MLP Module，实现跟之前一样的操作：

In [3]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        # 定义层
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    # 定义前向函数的运算
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [4]:
net = MLP()
net(X)

tensor([[ 0.1495, -0.2505,  0.2586, -0.0539, -0.0353, -0.2609, -0.0131, -0.0313,
         -0.0268, -0.0772],
        [ 0.0923, -0.1735,  0.3054, -0.1444, -0.2471, -0.2457, -0.0312, -0.0380,
         -0.0468, -0.0069]], grad_fn=<AddmmBackward>)

3.手动定义一个顺序块Sequential

In [5]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

In [6]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.0699, -0.0697, -0.0617, -0.0775,  0.1169,  0.2569,  0.1473,  0.0108,
          0.0872,  0.0270],
        [-0.1398, -0.2304, -0.1077, -0.0490, -0.1197,  0.2514,  0.2516,  0.0729,
          0.0253,  0.0126]], grad_fn=<AddmmBackward>)

当方法1的nn.Sequential不能满足运算要求时，可以用方法2，继承nn.Module，在init和forward方法中做任意自定义的计算，也可以返回任意值。

In [7]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False) # 不参与训练
        self.linear = nn.Linear(20, 20)
        
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [8]:
net = FixedHiddenMLP()
net(X)

tensor(-0.0863, grad_fn=<SumBackward0>)

只要是nn的Module的子类，就可以嵌套使用。

如：定义好的网络NestMLP，层nn.Linear，层nn.Sequential，都可以灵活嵌套。

In [9]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(), 
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    
    def forward(self, X):
        return self.linear(self.net(X))

In [10]:
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.1329, grad_fn=<SumBackward0>)