# 1.继承 Module类来构建模型    
没有将module类命名为Layer或者model是因为Module类很灵活，可以是层，也可以是模型或者其一部分

In [3]:
import torch
from torch import nn

class MLP(nn.Module):
    # 声明两个全连接层
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP,self).__init__()
        self.W1 = nn.Linear(input_size,hidden_size)
        self.activation = nn.ReLU()
        self.W2 = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        x = self.activation(self.W1(x))
        return self.W2(x)
    # backward自动生成

In [4]:
batch_size = 2
input_size, hidden_size, output_size = 784, 256, 10
X = torch.rand(batch_size, input_size)
net = MLP(input_size, hidden_size, output_size)
net

MLP(
  (W1): Linear(in_features=784, out_features=256, bias=True)
  (activation): ReLU()
  (W2): Linear(in_features=256, out_features=10, bias=True)
)

In [5]:
net(X)

tensor([[-0.1230,  0.0658, -0.2367,  0.0200, -0.1056,  0.0963,  0.3368,  0.1295,
         -0.2195,  0.0380],
        [-0.1679,  0.0590, -0.0267,  0.0114,  0.0302,  0.0973,  0.1587,  0.2553,
         -0.1287, -0.0179]], grad_fn=<AddmmBackward>)

# Module子类

## Sequential类

In [22]:
# 当模型的前向计算为简单串联
MLP = torch.nn.Sequential(
    torch.nn.Linear(784,256),
    torch.nn.ReLU(),
    torch.nn.Linear(256,10)
)

In [23]:
class MySequential(nn.Module):
    from collections import OrderedDict
    def __init__(self, *args):
        super(MySequential, self).__init__()
        if len(args) == 1 and isinstance(args[0], OrderDict):
            for key, module in args[0].items():
                # add_module方法会将module添加进self._modules
                self.add_module(key, model)
        else:
            for index, module in enumerate(args):
                self.add_module(str(index), module)
    def forward(self, input):
        for model in self._modules.values():
            input = module(input)
        return input

In [24]:
MyMLP = MySequential(
    nn.Linear(784,256),
    nn.ReLU(),
    nn.Linear(256,10)
)
print(MLP)
print(MyMLP)

Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)
MySequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


## ModuleList类
接受子模块的列表，也可以做类似列表的append, extend操作

In [25]:
net = nn.ModuleList([nn.Linear(784, 256)])
net.append(nn.ReLU())
net.append(nn.Linear(256,10))

print(net[-1])
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


### Sequential与Module的区别
Sequential内模块按顺序排列，需要保证相邻层的输入输出匹配，自动实现forward功能    
ModuleList只是存储模块的列表，无联系无顺序，需要自己实现forward（相比起来更灵活) 

In [26]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10,10) for i in range(10)])
    def forward(self, x):
        # ModuleList可以迭代，可以用整数索引
        for i, linear in enumerate(self.linears):
            x = self.linears[i//2](x) + l(x)
        return x

但要注意的是，ModuleList也不是完全等于python的list, ModuleList中的模块的参数会被自动添加进网络

In [29]:
class Module_ModuleList(nn.Module):
    def __init__(self):
        super(Module_ModuleList, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10,10)])

class Module_List(nn.Module):
    def __init__(self):
        super(Module_List, self).__init__()
        self.linears = [nn.Linear(10,10)]

net1 = Module_ModuleList()
net2 = Module_List()

print(net1)
print(net2)

for param in net1.parameters():
    print(param.size())
    
for param in net2.parameters():
    print(param.size())

Module_ModuleList(
  (linears): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
  )
)
Module_List()
torch.Size([10, 10])
torch.Size([10])


# ModuleDict类
接受子模块的字典为输入，操作类似字典。同样也需要自己定义forward

In [32]:
net = nn.ModuleDict({
    'Linear': nn.Linear(784,256),
    'activation': nn.ReLU()
})
# 添加
net['output'] = nn.Linear(256, 10)
print(net['Linear'])
print(net.output)
print(net)

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (Linear): Linear(in_features=784, out_features=256, bias=True)
  (activation): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


# 2.构造复杂模型

虽然sequential类无需定义forward，但只局限于简单串联。多数时候，我们还是会直接继承Module类，因为更加灵活。下面来构建一个复杂一些的模型

In [48]:
class FancyMLP(nn.Module):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        # 不可训练参数
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20,20)
    def forward(self, x):
        x = self.linear(x)
        x = nn.functional.relu(torch.mm(x, self.rand_weight.data) + 1)
        # 复用self.linear
        x = self.linear(x)
        
        while x.norm().item() > 1:
            x /= 2
        if x.norm().item() < 0.8:
            x *= 10
        return x.sum()

In [54]:
X = torch.rand(2, 20)
net = FancyMLP()
print(net)
net(X)

FancyMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)


tensor(0.6315, grad_fn=<SumBackward0>)

当然，Module的子类可以互相嵌套使用

In [58]:
class NestMLP(nn.Module):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential(
            nn.Linear(40, 30),
            nn.ReLU()
        )
    def forward(self, x):
        return self.net(x)

net = nn.Sequential(NestMLP(), nn.Linear(30,20), FancyMLP())

X = torch.rand(2,40)
print(net)
net(X)


Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ReLU()
    )
  )
  (1): Linear(in_features=30, out_features=20, bias=True)
  (2): FancyMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)


tensor(-15.4066, grad_fn=<SumBackward0>)