### :
    以构造含单隐藏层的多层感知机为例，其中第一层输出大小为 256，第二层输出大小为 10

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
import torchvision

### 继承 Module 类来构造模型:
    Module类是nn模块里提供的一个模型构造类，是所有神经网络模块的基类，可以通过继承它来定义想要的模型

In [6]:
class MLP(torch.nn.Module):
    
    # 声明带有模型参数的层，这里声明了两个全连接层
    def __init__(self,num_inputs,num_hiddens,num_outputs):
        super(MLP,self).__init__()
        self.num_input = num_inputs
        self.hidden = torch.nn.Linear(num_inputs,num_hiddens) # 隐藏层
        self.act = torch.nn.ReLU()
        self.output = torch.nn.Linear(num_hiddens,num_outputs) # 输出层
        
     # 定义模型的前向计算，即如何根据输入x计算返回所需要的模型输出
    def forward(self,x):
        x = x.view(-1,self.num_input)
        h = self.hidden(x)
        h = self.act(h)
        out = self.output(h)
        return out

In [8]:
num_inputs,num_hiddens,num_outputs = 28 * 28,256,10
x = torch.randn(2,num_inputs)
print(x.shape)

net = MLP(num_inputs,num_hiddens,num_outputs)
print(net)
net(x)

torch.Size([2, 784])
MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.0642,  0.2110, -0.1054,  0.1317, -0.0462,  0.3714, -0.1830, -0.2279,
          0.4248, -0.0802],
        [ 0.3755, -0.0987, -0.2511, -0.0330, -0.0279,  0.1122, -0.1949,  0.0027,
          0.4253,  0.3917]], grad_fn=<AddmmBackward0>)

### Module的子类:
    Module类是一个通用的部件
    PyTorch还实现了继承自Module的可以方便构建模型的类: 如 Sequential、ModuleList 和 ModuleDict等等

#### Sequential类:
    当模型的前向计算为简单串联各个层的计算时，Sequential类可以通过更加简单的方式定义模型
        Sequential类的目的：它可以接收一个子模块的有序字典（OrderedDict）或者一系列子模块作为参数来逐一添加Module的实例
        
    通过该方法定义，模型中的各层是有顺序的

In [11]:
net = torch.nn.Sequential(
    torch.nn.Linear(num_inputs,num_hiddens),
    torch.nn.ReLU(),
    torch.nn.Linear(num_hiddens,num_outputs)
)
# 等价于 net = Net() 方法

In [12]:
net

Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)

#### ModuleList类:
    ModuleList接收一个子模块的列表作为输入，然后可以类似List那样进行append和extend操作

In [107]:
net = torch.nn.ModuleList([torch.nn.Linear(num_inputs,256),torch.nn.ReLU()])
print('append前：\n',net)
net.append(torch.nn.Linear(128,num_outputs))
net.append(torch.nn.Linear(256,128))
print('append后:\n',net)
print('通过索引访问各层:\n',net[-1]) # # 类似List的索引访问

append前：
 ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
)
append后:
 ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=10, bias=True)
  (3): Linear(in_features=256, out_features=128, bias=True)
)
通过索引访问各层:
 Linear(in_features=256, out_features=128, bias=True)


In [114]:
out = net[0](x)
out = net[1](out)
out = net[3](out)
out = net[1](out)
result = net[2](out)
result.shape

torch.Size([2, 10])

##### Sequential和ModuleList都可以进行列表化构造网络，区别:
    ModuleList仅仅是一个储存各种模块的列表，模块之间没有联系也没有顺序,
        即不用保证相邻层的输入输出维度匹配，而且没有实现forward功能需要自己实现
        
    Sequential内的模块需要按照顺序排列，要保证相邻层的输入输出大小相匹配，内部forward功能已经实现

In [115]:
# ModuleList的出现只是让网络定义前向传播时更加灵活
class MyModule(torch.nn.Module):
    def __init__(self):
        super(MyModule,self).__init__()
        self.linears = torch.nn.ModuleList([
            torch.nn.Linear(28*28,256),
            torch.nn.Linear(256,10)
        ])
        self.act = torch.nn.Sigmoid()
        
    def forward(self,x):
#         x = x.view(-1,10)
        for i,l in enumerate(self.linears):
            x = self.linears[i](x)
            x = self.act(x)

        return x

In [116]:
net = MyModule()
net

MyModule(
  (linears): ModuleList(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=10, bias=True)
  )
  (act): Sigmoid()
)

In [117]:
x = torch.randn(2,28*28)
net(x)

tensor([[0.5111, 0.4616, 0.5501, 0.4119, 0.6059, 0.5918, 0.5332, 0.5335, 0.4305,
         0.4957],
        [0.5402, 0.4827, 0.5329, 0.4475, 0.6109, 0.5786, 0.4966, 0.5325, 0.4122,
         0.5174]], grad_fn=<SigmoidBackward0>)

#### ModuleDict类:
    ModuleDict接收一个子模块的字典作为输入, 然后也可以类似字典那样进行添加访问操作
    
    和ModuleList一样，ModuleDict实例仅仅是存放了一些模块的字典，并没有定义forward函数需要自己定义

In [118]:
net = torch.nn.ModuleDict({
    'linear_1':torch.nn.Linear(28*28,256),
    'act':torch.nn.ReLU(),
})

In [119]:
print('添加前:\n',net)
net['output'] = torch.nn.Linear(256,10)
print('添加后:\n',net)
print('访问:',net['act'])

添加前:
 ModuleDict(
  (linear_1): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
)
添加后:
 ModuleDict(
  (linear_1): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)
访问: ReLU()


### 构造复杂的模型:
    在下例网络中，创建一个不被迭代的参数，即常数参数

In [124]:
class FancyMLP(torch.nn.Module):
    def __init__(self):
        super(FancyMLP,self).__init__()
        
        self.rand_weight = torch.rand((20,20),requires_grad=True) # 不可训练参数
        
        self.linear = torch.nn.Linear(20,20)
        
    def forward(self,x):
        x = self.linear(x)
        # 使用创建的常数参数，以及nn.functional中的relu函数和mm函数
        x = torch.nn.functional.relu(torch.mm(x,self.rand_weight))
        # 复用全连接层。等价于两个全连接层共享参数
        x = self.linear(x)
        return x

In [127]:
x = torch.randn(2,20)
net = FancyMLP()
print(net)
net(x)

FancyMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)


tensor([[ 0.0990, -0.1480, -0.0983,  0.0971,  0.0212,  0.2084, -0.2221,  0.1330,
          0.0380, -0.0148, -0.1950, -0.1751, -0.2196, -0.0632,  0.1342, -0.1370,
         -0.0956, -0.0848,  0.0536, -0.1580],
        [ 0.9119, -0.0256, -1.0380,  1.1922,  0.1304, -0.6293, -0.9572, -0.3028,
         -0.7028,  0.2416, -0.1597, -0.4243, -1.3067,  0.5696, -1.2758,  0.2742,
         -0.0214,  0.2981,  0.4599, -0.2188]], grad_fn=<AddmmBackward0>)

In [133]:
# 因为 FancyMLP 和 Sequential 类都是 Module 类的子类，所以可以嵌套调用它们
net = torch.nn.Sequential(torch.nn.Linear(20,10))
net = torch.nn.Sequential(FancyMLP(),net,torch.nn.Linear(10,1))
net

Sequential(
  (0): FancyMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
  (1): Sequential(
    (0): Linear(in_features=20, out_features=10, bias=True)
  )
  (2): Linear(in_features=10, out_features=1, bias=True)
)