## 1. 访问参数

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))

X = torch.rand(2, 4)
net(X)


tensor([[-0.2822],
        [-0.2836]], grad_fn=<AddmmBackward0>)

参数访问

In [2]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[ 0.0858, -0.3450,  0.0214, -0.1154, -0.1853,  0.0702, -0.0239, -0.0585]])),
             ('bias', tensor([-0.0056]))])

In [7]:
print(type(net[2].bias)) # Parameter定义的是一个可以优化的参数
print(net[2].bias) # tensor 由data和grad组成
print(net[2].bias.data)
print(net[2].bias.grad) # 此时还未BP so梯度为None

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.0056], requires_grad=True)
tensor([-0.0056])
None


一次性访问所有参数

In [14]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])  # 星号大概是解包意味
print(*[(name, param.shape) for name, param in net.named_parameters()]) # 这个网络的全部参数 relu层没有参数


('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [15]:
net.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.0952, -0.3883,  0.2296, -0.0054],
                      [-0.0477,  0.4109,  0.0550,  0.2887],
                      [ 0.3021,  0.3643, -0.1415, -0.4893],
                      [ 0.3586,  0.1796,  0.2362,  0.4590],
                      [ 0.3365, -0.0113, -0.2847,  0.1146],
                      [ 0.1044,  0.1765, -0.3702, -0.3456],
                      [-0.3012, -0.1206,  0.4690, -0.2217],
                      [ 0.3657, -0.4289,  0.2269,  0.2933]])),
             ('0.bias',
              tensor([-0.0988,  0.2384, -0.1475, -0.0636, -0.3188,  0.3589, -0.1716,  0.0871])),
             ('2.weight',
              tensor([[ 0.0858, -0.3450,  0.0214, -0.1154, -0.1853,  0.0702, -0.0239, -0.0585]])),
             ('2.bias', tensor([-0.0056]))])

In [18]:
net.state_dict()['2.bias'] # 字典

从嵌套块中收集参数

In [25]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())  # (name, module) # 
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4, 1))  # (4,8)->(8,4)-> (4,8)->(8,4)-> (4,8)->(8,4)-> (4,8)->(8,4)-> (4,1)
rgnet(X)


tensor([[0.1737],
        [0.1738]], grad_fn=<AddmmBackward0>)

In [29]:
print(rgnet) 
"""即可以通过print(net)来了解网络长什么样子"""

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


'即可以通过print(net)来了解网络长什么样子'

## 2. 初始化参数

对net所有块做统一初始化

In [31]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))

In [32]:
def init_normal(m): # 参数为Module
    if type(m) == nn.Linear:  # 如果是全连接层的话
        nn.init.normal_(m.weight, mean=0, std=0.01)  # 初始化weight为正态分布
        nn.init.zeros_(m.bias)  # 下划线代表替换函数 而非返回值类型
        ## TODO init module里面包含了大量用来初始化的函数
    
net.apply(init_normal)  # 遍历整个net进行初始化

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [33]:
net.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.0028,  0.0167,  0.0078,  0.0057],
                      [ 0.0073,  0.0060,  0.0053, -0.0028],
                      [-0.0226, -0.0012, -0.0012,  0.0010],
                      [ 0.0134, -0.0065, -0.0015, -0.0014],
                      [-0.0100, -0.0151,  0.0125,  0.0037],
                      [-0.0003,  0.0188, -0.0049,  0.0106],
                      [-0.0007, -0.0086,  0.0019,  0.0015],
                      [-0.0014,  0.0056,  0.0127, -0.0124]])),
             ('0.bias', tensor([0., 0., 0., 0., 0., 0., 0., 0.])),
             ('2.weight',
              tensor([[-0.0097,  0.0178, -0.0034, -0.0040,  0.0004, -0.0032, -0.0101, -0.0095]])),
             ('2.bias', tensor([0.]))])

In [39]:
def init_constant(m):
    """比如说此处将其初始化为常数1  仅做展示  实际可别这么做""" 
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)

net.apply(init_constant)
net.state_dict()

OrderedDict([('0.weight',
              tensor([[1., 1., 1., 1.],
                      [1., 1., 1., 1.],
                      [1., 1., 1., 1.],
                      [1., 1., 1., 1.],
                      [1., 1., 1., 1.],
                      [1., 1., 1., 1.],
                      [1., 1., 1., 1.],
                      [1., 1., 1., 1.]])),
             ('0.bias', tensor([0., 0., 0., 0., 0., 0., 0., 0.])),
             ('2.weight', tensor([[1., 1., 1., 1., 1., 1., 1., 1.]])),
             ('2.bias', tensor([0.]))])

对net某些块应用不同的初始化方法

In [40]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
def xavier(m):  # 貌似是一个很知名的初始化方式 俺不清楚
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight) 

def init_42(m): 
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42) # 宇宙的答案 42

net[0].apply(xavier) # 第一个全连接层用这个
net[2].apply(init_42)
net.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.0998, -0.5786,  0.6705,  0.2441],
                      [ 0.3831, -0.1285,  0.6515,  0.6388],
                      [-0.0608,  0.0901,  0.2831,  0.1976],
                      [ 0.7063, -0.3314, -0.3340, -0.6324],
                      [-0.6237, -0.1408,  0.5775, -0.2012],
                      [ 0.5034,  0.3039,  0.5971,  0.5655],
                      [-0.2118,  0.6935,  0.6494,  0.3139],
                      [ 0.0387,  0.1685,  0.3475,  0.0262]])),
             ('0.bias',
              tensor([-0.2149, -0.4341, -0.1474,  0.2054, -0.3340,  0.2776,  0.0420,  0.0963])),
             ('2.weight', tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])),
             ('2.bias', tensor([0.0940]))])