In [12]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size=(2, 4))
net(X)

tensor([[-0.2207],
        [-0.3601]], grad_fn=<AddmmBackward0>)

In [2]:
net

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [4]:
net[2]

Linear(in_features=8, out_features=1, bias=True)

In [6]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1225], requires_grad=True)
tensor([0.1225])


In [15]:
net[2].weight.grad is None

True

In [22]:
print(*[(name, param.shape) for name, param in net.named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [26]:
print(*[i for i in range(5)])

0 1 2 3 4


In [30]:
print(*[i for i in range(5)])

0 1 2 3 4


In [33]:
type(net.state_dict())

collections.OrderedDict

In [34]:
net.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.3870,  0.2496, -0.1356,  0.2688],
                      [-0.0750, -0.2310,  0.0007, -0.2305],
                      [-0.0435,  0.3705, -0.1366,  0.3289],
                      [ 0.0806,  0.2885, -0.0302, -0.4230],
                      [ 0.0583,  0.0402,  0.3624, -0.3451],
                      [ 0.1671,  0.4197, -0.1433,  0.0224],
                      [ 0.1113, -0.2999,  0.2463, -0.4614],
                      [-0.2587, -0.4353,  0.4803,  0.3627]])),
             ('0.bias',
              tensor([ 0.1867, -0.4011, -0.0613, -0.4134,  0.4105, -0.1960, -0.4737, -0.2747])),
             ('2.weight',
              tensor([[ 0.1902,  0.1168,  0.3519, -0.0704, -0.1983, -0.1778, -0.2202,  0.2183]])),
             ('2.bias', tensor([-0.2307]))])

In [35]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                         nn.Linear(8, 4), nn.ReLU())


def block2():
    net = nn.Sequential()
    for i in range(4):
        # 在这里嵌套
        net.add_module(f'block {i}', block1())
    return net


rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

tensor([[0.0432],
        [0.0432]], grad_fn=<AddmmBackward0>)

In [36]:
rgnet

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)

In [39]:
def init_normal(m):
    if isinstance(m, nn.Linear):
        # if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)


net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([ 0.0059,  0.0071, -0.0110,  0.0121]), tensor(0.))

In [40]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)


net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))