# 第1节-层(Layer)和块(block)

实现一个MLP块, 它包含一个节点数量为num_hidden的隐含层, 接受特征数量为num_in的输入, 经过线性变换+激活函数+线性变换得到分量数目大小为num_out的输出

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
class MyMLP(nn.Module):
    def __init__(self, num_in, num_hidden, num_out):
        super().__init__()
        self.linearI = nn.Linear(num_in, num_hidden)
        self.linearO = nn.Linear(num_hidden, num_out)
    def forward(self, X):
        return self.linearO(F.relu(self.linearI(X)))


In [2]:
mlp1 = MyMLP(4, 32, 2)
iX = torch.tensor([[2, 3, 4, 5], [5, 3, 2, 1.0]])
oy = mlp1(iX)
iX, iX.shape, oy, oy.shape

(tensor([[2., 3., 4., 5.],
         [5., 3., 2., 1.]]),
 torch.Size([2, 4]),
 tensor([[-1.1490,  2.7165],
         [-0.7219,  0.3292]], grad_fn=<AddmmBackward0>),
 torch.Size([2, 2]))

注意在以上MyMLP的实现中有一些细节, 一是初始化调用父类默认初始化, 这里可以是可以自定义的, 另一个是MyMLP仅重写了foward方法, 在实例化MyMLP得到一个对象mlp1后, mlp1(iX)也即完成了forward方法调用

---
实现一个MySequential类, 它应当具有与Sequential完全相同的行为, 也即对于传入的模块, 应该按照顺序像链条一样的执行它们

In [51]:
class MySeqList(nn.Module):
    def __init__(self, *modules):
        super().__init__()
        self.modules = [] # 使用列表存放了各个模型
        for m in modules:
            self.modules.append(m)
    def forward(self, X):
        o = X
        for m in self.modules:
            o = m(o)
        return o

In [50]:
mlp2 = MySeqList(nn.Linear(4, 32), nn.ReLU(), nn.Linear(32, 2))
oy2 = mlp2(iX)
mlp2, oy2 # 似乎没什么问题?

(MySequential(),
 tensor([[ 0.2682, -0.7114],
         [ 0.6078, -0.3268]], grad_fn=<AddmmBackward0>))

`for i, val in enumrate(args)`意思是枚举所有的参数

In [59]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for i, m in enumerate(args):
            self._modules[str(i)] = m # 顺序枚举, 存入OrderedDict
    def forward(self, X):
        for m in self._modules.values():
            X = m(X)
        return X
mlp3 = MySequential(nn.Linear(4, 32), nn.ReLU(), nn.Linear(32, 2))
oy3 = mlp3(iX)
mlp3, oy3 
# 这里就体现出区别了, 内部细节是可以打印出来的, 问题的关键在于顺序的数据结构

(MySequential(
   (0): Linear(in_features=4, out_features=32, bias=True)
   (1): ReLU()
   (2): Linear(in_features=32, out_features=2, bias=True)
 ),
 tensor([[0.2972, 0.8175],
         [0.3226, 0.7153]], grad_fn=<AddmmBackward0>))

### 把模块嵌套起来
接下来, 试着实现一个嵌套的模块吧, 它可以由您任意的定义

In [69]:
# 这是一个 名为 【双十一】 的神经网络模块哦，今天啊，11月3号，双十一第一轮结束了呢
class DoubleEleven(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = MyMLP(3, 32, 12)
        self.l2 = MyMLP(12, 16, 32)
        self.l3 = MyMLP(32, 16, 8)
        self.net =  net = MySequential(self.l1, nn.ReLU(), self.l2, nn.ReLU(), self.l3)
    def forward(self, X):
        return self.net(X)

m1 = DoubleEleven()
iX = torch.tensor([[1.0, 2.2, 3.2], [2, 1, 3], [3, 1, 2], [2, 3, 1]])
print(iX.shape)
print(m1)
print(m1(iX)) 

torch.Size([4, 3])
DoubleEleven(
  (l1): MyMLP(
    (linearI): Linear(in_features=3, out_features=32, bias=True)
    (linearO): Linear(in_features=32, out_features=12, bias=True)
  )
  (l2): MyMLP(
    (linearI): Linear(in_features=12, out_features=16, bias=True)
    (linearO): Linear(in_features=16, out_features=32, bias=True)
  )
  (l3): MyMLP(
    (linearI): Linear(in_features=32, out_features=16, bias=True)
    (linearO): Linear(in_features=16, out_features=8, bias=True)
  )
  (net): MySequential(
    (0): MyMLP(
      (linearI): Linear(in_features=3, out_features=32, bias=True)
      (linearO): Linear(in_features=32, out_features=12, bias=True)
    )
    (1): ReLU()
    (2): MyMLP(
      (linearI): Linear(in_features=12, out_features=16, bias=True)
      (linearO): Linear(in_features=16, out_features=32, bias=True)
    )
    (3): ReLU()
    (4): MyMLP(
      (linearI): Linear(in_features=32, out_features=16, bias=True)
      (linearO): Linear(in_features=16, out_features=8, bias=T

接下来的话, 想实现这样一个模块，这一个模块接受两个模块net1, net2作为参数, 把他们的输出串起来作为输出，这称为并行

In [70]:
class Parallel(nn.Module):
    def __init__(self, net1, net2):
        super().__init__()
        self.net1 = net1
        self.net2 = net2
    def forward(self, X):
        o1 = self.net1(X)
        o2 = self.net2(X)
        return torch.concat([o1, o2], dim=1)

m1 = DoubleEleven() # 3 --> 8
m2 = MyMLP(3, 15, 7) # 3 -->7
pm = Parallel(m1, m2)
print(iX.shape)
opm = pm(iX)
print(opm)

torch.Size([4, 3])
tensor([[-0.1334, -0.2063, -0.1213, -0.1381, -0.0088,  0.2753, -0.1507, -0.3019,
         -0.7577,  0.1774, -0.4720, -0.0535, -0.4564, -0.1063,  0.7908],
        [-0.1334, -0.2066, -0.1204, -0.1366, -0.0088,  0.2740, -0.1529, -0.3000,
         -0.5569,  0.0771, -0.4934, -0.1579, -0.2295,  0.1907,  0.4243],
        [-0.1299, -0.2072, -0.1208, -0.1367,  0.0011,  0.2685, -0.1549, -0.3033,
         -0.3735, -0.0166, -0.4615, -0.1571, -0.1250,  0.2659,  0.3180],
        [-0.1307, -0.2048, -0.1196, -0.1382,  0.0010,  0.2663, -0.1555, -0.2996,
         -0.4522,  0.0484, -0.6037,  0.3139, -0.3515,  0.0557,  0.6013]],
       grad_fn=<CatBackward0>)


In [71]:
print(pm.state_dict().keys()) # OrderedDict

odict_keys(['net1.l1.linearI.weight', 'net1.l1.linearI.bias', 'net1.l1.linearO.weight', 'net1.l1.linearO.bias', 'net1.l2.linearI.weight', 'net1.l2.linearI.bias', 'net1.l2.linearO.weight', 'net1.l2.linearO.bias', 'net1.l3.linearI.weight', 'net1.l3.linearI.bias', 'net1.l3.linearO.weight', 'net1.l3.linearO.bias', 'net1.net.0.linearI.weight', 'net1.net.0.linearI.bias', 'net1.net.0.linearO.weight', 'net1.net.0.linearO.bias', 'net1.net.2.linearI.weight', 'net1.net.2.linearI.bias', 'net1.net.2.linearO.weight', 'net1.net.2.linearO.bias', 'net1.net.4.linearI.weight', 'net1.net.4.linearI.bias', 'net1.net.4.linearO.weight', 'net1.net.4.linearO.bias', 'net2.linearI.weight', 'net2.linearI.bias', 'net2.linearO.weight', 'net2.linearO.bias'])


就目前而言，我们所做的事就是把已经定义好的模块进行组合嵌套，在forward上也是如此，使用默认的输出方式，那么forward是否可以包含一般python计算程序中的控制流呢？

In [74]:
class Verbose(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 5)
        # 随机初始化的constant权重, 不参与反向传播
        self.rand_constant_weight = torch.randn(size=(5, 4), requires_grad=False)
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.matmul(X, self.rand_constant_weight)+1)
        while torch.abs(X).sum() > 1:
            X = X/2
        return X.sum()

vm = Verbose()
print(iX)
print(vm)
print(vm(iX))

tensor([[1.0000, 2.2000, 3.2000],
        [2.0000, 1.0000, 3.0000],
        [3.0000, 1.0000, 2.0000],
        [2.0000, 3.0000, 1.0000]])
Verbose(
  (linear): Linear(in_features=3, out_features=5, bias=True)
)
tensor(0.6649, grad_fn=<SumBackward0>)


In [76]:
print(vm.state_dict())
print(vm.rand_constant_weight)

OrderedDict([('linear.weight', tensor([[-0.4789, -0.4629, -0.3664],
        [ 0.2398, -0.4736, -0.5287],
        [-0.2510,  0.0498,  0.4063],
        [ 0.2895,  0.2333,  0.5280],
        [ 0.5192, -0.2114,  0.3307]])), ('linear.bias', tensor([ 0.4119,  0.3542, -0.5392, -0.3425, -0.4594]))])
tensor([[ 0.2239, -0.2493,  1.2607, -2.0692],
        [-0.3456,  0.6365,  1.1249, -0.7855],
        [ 0.4827,  0.0264, -1.0951,  0.2563],
        [ 0.8257, -0.1934,  1.4364,  0.5306],
        [-0.5577,  0.7912, -0.9933, -0.0128]])


[python函数之传递多个参数](https://blog.csdn.net/u011607898/article/details/107585700)
1、在python自定义函数中，如果需要传入的实际参数有多个，我们在定义形式参数的时候，可以有两种形式，一是`*parameter`，二是`**parameter`。这两种分别提供了传入的参数是多个的形式。
`*parameter`表示接收任意多个实际参数并将其放到一个元组中，类似于传递地址的形式，将多个数据一次性传入。
```
def printcoff(*para):
	for item in para:
		print(item)
printcoff("karl","inter","killer")
plist = [1,2,3,4,5,6,7,8,9,0]
printcoff(*plist)
```
`**parameter`表示接受任意多个类似关键字参数一样显示赋值的实际参数，并将其放到一个字典中。
```
def printcoff(**para):
	for key, value  in para.items():
		print(key,value)
pdict = {"1":"karl","2":"inter","3":"killer","4":"python"}
printcoff(**pdict)
```