# 层和块
#### 首先，回顾一下多层感知机

In [6]:
import torch
from torch import nn
from torch.nn import functional as F

# nn.Sequential 定义了一种特殊的Module
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10)) 

x = torch.rand(2,20)
print(x)
net(x)

tensor([[0.6982, 0.2854, 0.0290, 0.7186, 0.0909, 0.6777, 0.8265, 0.5853, 0.7808,
         0.5695, 0.8137, 0.1170, 0.5396, 0.8231, 0.2252, 0.2300, 0.7124, 0.4999,
         0.1046, 0.4006],
        [0.3517, 0.9393, 0.3400, 0.3913, 0.5782, 0.0842, 0.9048, 0.2108, 0.1988,
         0.1807, 0.5425, 0.7421, 0.3162, 0.5529, 0.9269, 0.2348, 0.2071, 0.1541,
         0.9291, 0.3744]])


tensor([[ 0.2138, -0.2288, -0.0046, -0.0808,  0.3115, -0.2133, -0.0592,  0.0755,
          0.2101, -0.0441],
        [ 0.3398, -0.2338, -0.0798, -0.1090,  0.4404,  0.0276, -0.0327,  0.0859,
          0.2104,  0.0121]], grad_fn=<AddmmBackward0>)

# 自定义块

In [9]:
# 自定义一个MLP
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256) # 隐藏层
        self.out = nn.Linear(256,10)    # 输出层
    
    def forward(self,x):
        return self.out(F.relu(self.hidden(x)))
    
net = MLP()
net(x)

tensor([[-0.0729, -0.0254,  0.0320,  0.2166, -0.1733,  0.0527, -0.2350, -0.1192,
         -0.0709, -0.2738],
        [-0.1414, -0.0980,  0.1425,  0.1301, -0.1586, -0.0246, -0.4771, -0.0048,
         -0.1115, -0.3160]], grad_fn=<AddmmBackward0>)

# 顺序块

In [15]:
# 自定义Sequential
"""好处：可以在init和forward中做大量的自定义计算"""
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    
    def forward(self,x):
        for block in self._modules.values():
            x = block(x)
        return x
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10)) 
net(x)

tensor([[ 0.0018,  0.1212,  0.3529, -0.1704,  0.0961,  0.2580,  0.1898, -0.0875,
          0.1653, -0.2605],
        [ 0.0881,  0.0433,  0.2426, -0.2055,  0.0274,  0.1916,  0.0390,  0.1200,
          0.0262, -0.2085]], grad_fn=<AddmmBackward0>)

# 参数管理
#### 首先关注具有单隐藏层的MLP

In [22]:
import torch
from torch import nn


net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1)) 

x = torch.rand(size=(2,4)) 
print(x)
net(x)

tensor([[0.5272, 0.1047, 0.4811, 0.8123],
        [0.7596, 0.0793, 0.8465, 0.8960]])


tensor([[0.2356],
        [0.2899]], grad_fn=<AddmmBackward0>)

# 参数访问

In [26]:
print(net[2].state_dict())  # 对应Sequential 中的nn.Linear(8,1)的权重
print(net[1].state_dict())  # nn.ReLU()
print(net[0].state_dict())  #对应Sequential 中的nn.Linear(4,8)的权重

OrderedDict([('weight', tensor([[ 0.3103, -0.2626, -0.0823,  0.0540, -0.1214, -0.2585,  0.1806,  0.2616]])), ('bias', tensor([0.1019]))])
OrderedDict()
OrderedDict([('weight', tensor([[ 0.3585, -0.4571,  0.1050,  0.4415],
        [-0.0475, -0.1293, -0.2974, -0.1900],
        [-0.2959, -0.4203, -0.2763,  0.4588],
        [ 0.3156, -0.4127, -0.3548, -0.3061],
        [ 0.4890, -0.3549, -0.1819, -0.4735],
        [ 0.2545,  0.3148, -0.1795,  0.1740],
        [-0.0910, -0.0700,  0.1461,  0.2510],
        [ 0.1270, -0.1825, -0.3972,  0.4853]])), ('bias', tensor([-0.1729, -0.4285,  0.3623, -0.4897, -0.3994,  0.3708,  0.4954,  0.0322]))])


# 目标参数

In [28]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data) # 通过 .data 来访问这个点对应的值

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1019], requires_grad=True)
tensor([0.1019])


In [31]:
net[2].weight.grad == None # .grad 是获取梯度的，这里因为还没有进行反向计算，所以grad=None

True

# 一次性访问所有参数

In [33]:
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


# 从嵌套块收集参数

In [38]:
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1())
    return net
rgnet = nn.Sequential(block2(),nn.Linear(4,1))
rgnet(x)

tensor([[0.3437],
        [0.3437]], grad_fn=<AddmmBackward0>)

In [39]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


# 内置初始化

In [44]:
# 将参数修改成一个正态分布的数据
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
        
net.apply(init_normal)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([0.0103, 0.0016, 0.0021, 0.0064]), tensor(0.))

In [53]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
        
net.apply(init_constant)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

# 对某些块采用不同的初始化方法

In [54]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
        
net[0].apply(xavier) 
net[2].apply(init_constant)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[-0.3874,  0.0306, -0.3589, -0.3128],
        [-0.4779,  0.1630, -0.7012,  0.2944],
        [ 0.5970,  0.1403, -0.3848, -0.4583],
        [-0.2186,  0.5004, -0.7043, -0.3721],
        [-0.6522,  0.5792, -0.6901, -0.2923],
        [-0.4088,  0.4512, -0.4216, -0.1689],
        [-0.0648,  0.2056, -0.6500, -0.4567],
        [ 0.2559, -0.0684, -0.3333,  0.3479]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1.]])


In [63]:
# 可以直接把值拿出来进行修改
net[0].weight.data[:]+=1
net[0].weight.data[0]

tensor([2.6126, 3.0306, 2.6411, 2.6872])

# 参数绑定

In [64]:
# 两个数据流共享参数
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))

net(x)
print(net[2].weight.data[0]==net[4].weight.data[0])
net[2].weight.data[0,0] = 100
print(net[2].weight.data[0]==net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


# 自定义层
#### 构造一个没有任何参数的自定义层

In [66]:
import torch
from torch import nn
from torch.nn import functional as F

class CenteredLayer(nn.Module):
    def __init__(self,*args):
        super().__init__()
        
    def forward(self,x):
        return x-x.mean()
    
layer = CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

# 带参数的图层

In [67]:
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units,units))
        self.bias = nn.Parameter(torch.randn(units,))
        
    def forward(self,x):
        linear = torch.matmul(x,self.weight.data)+self.bias.data
        return F.relu(linear)
    
dense = MyLinear(5,3)
dense.weight

Parameter containing:
tensor([[ 0.6420,  0.3398,  0.7631],
        [ 0.1726, -1.3645,  0.4012],
        [-2.1676, -1.2356, -0.1038],
        [-0.5522,  0.2644, -1.2740],
        [-1.7354,  0.8075,  1.0103]], requires_grad=True)

# 读写文件
#### 加载和保存张量

In [68]:
import torch
from torch import nn
from torch.nn import functional as F

x = torch.arange(4)
torch.save(x,'x-file')

x2 = torch.load("x-file")
x2

tensor([0, 1, 2, 3])

# 加载和保存模型参数

In [69]:
# 自定义一个MLP
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256) # 隐藏层
        self.out = nn.Linear(256,10)    # 输出层
    
    def forward(self,x):
        return self.out(F.relu(self.hidden(x)))
    
net = MLP()
x = torch.randn(size=(2,20))
y = net(x)

# 将模型的参数存储为一个叫做“mlp.params”的文件

In [71]:
# 将模型的参数保存到字典里
torch.save(net.state_dict(),'mlp.params')

In [73]:
# 实例化了原始多层感知机模型的一个备份，直接读取文件中存储的参数
clone = MLP()
clone.load_state_dict(torch.load("mlp.params"))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)