# 5.1 层和块


In [1]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(in_features=20, out_features=256),
                    nn.ReLU(),
                    nn.Linear(in_features=256, out_features=10))

X = torch.rand(size=(2, 20))
net(X)

tensor([[ 0.1666,  0.1273,  0.0996, -0.0224,  0.2541, -0.0795,  0.2937, -0.1695,
         -0.1536, -0.0460],
        [ 0.1825,  0.0232,  0.1019, -0.0277,  0.3072, -0.0159,  0.2991, -0.2668,
         -0.0080, -0.0412]], grad_fn=<AddmmBackward>)

In [3]:
import torch
from torch import nn
from torch.nn import functional as F

# 自定义块
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(in_features=20, out_features=256)
        self.out = nn.Linear(in_features=256, out_features=10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

X = torch.rand(size=(2, 20))
net = MLP()
net(X)

tensor([[-0.0144,  0.1496,  0.0862,  0.0867,  0.3435, -0.0656,  0.0015,  0.1540,
         -0.0204, -0.2431],
        [ 0.0334,  0.1602,  0.0483,  0.1977,  0.0845, -0.0769, -0.0517,  0.1162,
          0.0750, -0.1687]], grad_fn=<AddmmBackward>)

In [5]:
# 顺序块
class MySequential(nn.Module):
    def __init__(self, *args):
        super(MySequential, self).__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。module的类型是OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        for block in self._modules.values():  # D.values -> an object providing a view on D's value
            X = block(X)
        return X

net = MySequential(nn.Linear(in_features=20, out_features=256),
                   nn.ReLU(),
                   nn.Linear(in_features=256, out_features=10))
X = torch.rand(size=(2, 20))
net(X)

tensor([[ 0.2815,  0.0244,  0.2887,  0.0138, -0.3167,  0.0708,  0.0596,  0.0548,
         -0.3138, -0.2818],
        [ 0.4263, -0.1813,  0.2421,  0.1410, -0.2793,  0.1008,  0.0694,  0.0796,
         -0.4309, -0.3870]], grad_fn=<AddmmBackward>)

In [3]:
import torch
from torch import nn
from torch.nn import functional as F

# 前向传播中执行代码
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super(FixedHiddenMLP, self).__init__()
        self.rand_weight = torch.rand(size=(20, 20), requires_grad=False)
        self.linear = nn.Linear(in_features=20, out_features=20)

    def forward(self, X: torch.Tensor):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

X = torch.rand(size=(2, 20))
net = FixedHiddenMLP()
net(X)

tensor(-0.2067, grad_fn=<SumBackward0>)

In [5]:
# quiz

class MySequentialQuiz(nn.Module):
    def __init__(self, *args):
        super(MySequentialQuiz, self).__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。module的类型是OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        out = []
        for block in self._modules.values():  # D.values -> an object providing a view on D's value
            X = block(X)
            out.append(X)
        return out

net = MySequentialQuiz(nn.Linear(in_features=20, out_features=256),
                   nn.ReLU(),
                   nn.Linear(in_features=256, out_features=10))
X = torch.rand(size=(2, 20))
net(X)

[tensor([[ 0.1243, -0.0813, -0.1323, -0.1637, -0.2136, -0.1932, -0.4604, -0.1141,
          -0.3389, -0.2385,  0.1201, -0.1476, -0.3179, -0.3248, -0.1174,  0.3162,
           0.0389, -0.5190, -0.1210,  0.1834,  0.1344,  0.1521, -0.1481,  0.2967,
           0.0743, -0.3023,  0.3846, -0.3553, -0.1025,  0.2429, -0.0315,  0.5640,
           0.0583,  0.0910, -0.2686, -0.0109,  0.2129, -0.1305,  0.0647,  0.8903,
          -0.5316,  0.1413,  0.1434,  0.0759, -0.1232, -0.1770,  0.2084, -0.1076,
          -0.6032, -0.2984,  0.0267,  0.2979,  0.7035, -0.5143,  0.9181, -0.0983,
           0.0969, -0.2187, -0.0411,  0.7130, -0.5817,  0.3987, -0.0164,  0.3483,
           0.3313, -0.2712,  0.1693, -0.9545,  0.0856,  0.3903,  0.1812,  0.5665,
           0.3584, -0.1828, -0.4873,  0.5350,  0.6609,  0.4909, -0.1759,  0.0279,
           0.0620,  0.9492, -0.2923,  0.1423,  0.3196,  0.5844, -0.1143,  0.3250,
          -0.1516,  0.4451, -0.8112, -0.4231, -0.1418,  0.8036,  0.1056, -0.3403,
           0.177

In [6]:
# 5.2 参数管理

import torch
from torch import nn

net = nn.Sequential(nn.Linear(in_features=4, out_features=8),
                    nn.ReLU(),
                    nn.Linear(in_features=8, out_features=1))
X = torch.rand(size=(2, 4))
net(X)

tensor([[0.2543],
        [0.2539]], grad_fn=<AddmmBackward>)

In [7]:

print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.0842, -0.1992,  0.1334, -0.0959, -0.0766,  0.2101,  0.2044, -0.0852]])), ('bias', tensor([0.2948]))])


In [19]:
# 目标参数
print(net[2].weight)
print(net[2].bias)
print(net[2].bias.data)
print(net[2].bias.data.cpu().numpy())
print(type(net[2].bias.data.cpu().numpy()))
print(net[2].bias.data.cpu().numpy().tolist())
print(type(net[2].bias.data.cpu().numpy().tolist()))
print(net[2].weight.grad)

Parameter containing:
tensor([[ 0.0842, -0.1992,  0.1334, -0.0959, -0.0766,  0.2101,  0.2044, -0.0852]],
       requires_grad=True)
Parameter containing:
tensor([0.2948], requires_grad=True)
tensor([0.2948])
[0.29475138]
<class 'numpy.ndarray'>
[0.29475137591362]
<class 'list'>
None


In [36]:
# 一次性访问所有参数
# print([module for module in net.named_modules() ])
# modules = [module for module in net.named_modules() ]
# print(isinstance(modules[0], nn.Module))
print(*[(name, param) for name, param in net[0].named_parameters()])
print(*[(name, param) for name, param in net.named_parameters()])

('weight', Parameter containing:
tensor([[ 0.0090, -0.1568, -0.3177, -0.2443],
        [-0.0004,  0.0025,  0.3071,  0.0847],
        [ 0.2652, -0.3415,  0.2819, -0.2263],
        [-0.2020, -0.2395, -0.0179, -0.0793],
        [ 0.2286,  0.0326,  0.3720,  0.2633],
        [ 0.1730, -0.0229,  0.3710, -0.0860],
        [-0.2640,  0.3199, -0.3763,  0.2332],
        [ 0.1525, -0.0613,  0.0304,  0.1764]], requires_grad=True)) ('bias', Parameter containing:
tensor([ 0.0722, -0.4419,  0.4681,  0.4439, -0.3091, -0.4832, -0.4247,  0.4015],
       requires_grad=True))
('0.weight', Parameter containing:
tensor([[ 0.0090, -0.1568, -0.3177, -0.2443],
        [-0.0004,  0.0025,  0.3071,  0.0847],
        [ 0.2652, -0.3415,  0.2819, -0.2263],
        [-0.2020, -0.2395, -0.0179, -0.0793],
        [ 0.2286,  0.0326,  0.3720,  0.2633],
        [ 0.1730, -0.0229,  0.3710, -0.0860],
        [-0.2640,  0.3199, -0.3763,  0.2332],
        [ 0.1525, -0.0613,  0.0304,  0.1764]], requires_grad=True)) ('0.bias', P

In [37]:
# 从嵌套层中收集参数

def block1():
    return nn.Sequential(nn.Linear(in_features=4, out_features=8), nn.ReLU(),
                        nn.Linear(in_features=8, out_features=4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(name=f'block-{i}', module=block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(in_features=4, out_features=1))
rgnet(X)

tensor([[0.1579],
        [0.1579]], grad_fn=<AddmmBackward>)

In [38]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block-0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block-1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block-2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block-3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [39]:
print(rgnet[0][1][0].bias.data)



tensor([ 0.4061, -0.4364,  0.2558,  0.2830,  0.1445, -0.2186, -0.0677,  0.0837])


In [40]:
# 内置初始化
def init_normal(m: nn.Module):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
print(net[0].weight.data)
print(net[0].bias.data[0])

tensor([[-0.0030,  0.0002,  0.0107, -0.0070],
        [-0.0074,  0.0075,  0.0142,  0.0101],
        [ 0.0020, -0.0098,  0.0043, -0.0068],
        [-0.0064,  0.0032,  0.0025,  0.0009],
        [-0.0056, -0.0004, -0.0136,  0.0025],
        [-0.0182, -0.0191,  0.0078,  0.0153],
        [-0.0024, -0.0180,  0.0023,  0.0149],
        [ 0.0035, -0.0025, -0.0093,  0.0038]])
tensor(0.)


In [42]:
# 内置初始化-常量初始化
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(tensor=m.weight, val=42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[-0.1096, -0.0094, -0.2263,  0.5263],
        [-0.5645, -0.0517,  0.6106,  0.1674],
        [-0.4088,  0.5038,  0.3854,  0.0969],
        [-0.5079,  0.1104, -0.4637,  0.5731],
        [-0.3645, -0.6534,  0.6860,  0.6882],
        [-0.2698, -0.6082,  0.0899, -0.6315],
        [-0.4236,  0.1624,  0.4415, -0.1981],
        [-0.5675,  0.2436,  0.3304, -0.1649]])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [None]:
# 自定义初始化
def init_my(m: nn.Module):
    print('Init', *[(name, param.shape) for name, param in m.name_parameters()][0])
