In [2]:
import torch
from torch import nn

## Paramter Access

In [28]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(2, 4)
net(X)

tensor([[-0.2407],
        [-0.2337]], grad_fn=<AddmmBackward>)

In [29]:
net.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.3783,  0.0534,  0.2602, -0.3800],
                      [-0.2251,  0.1473, -0.0224,  0.0828],
                      [ 0.0029, -0.2816,  0.3262, -0.1190],
                      [-0.2502,  0.4433, -0.0049, -0.2059],
                      [-0.0929,  0.1371,  0.4339,  0.4236],
                      [ 0.4728,  0.4933, -0.4876,  0.4186],
                      [ 0.1065, -0.0477,  0.3502, -0.0773],
                      [-0.0096,  0.2285,  0.0847, -0.3798]])),
             ('0.bias',
              tensor([-0.2840,  0.4161, -0.1825, -0.1258, -0.4711, -0.2630, -0.1046,  0.2474])),
             ('2.weight',
              tensor([[ 0.2772,  0.1899, -0.0207, -0.0636, -0.3061, -0.3089,  0.1627, -0.1445]])),
             ('2.bias', tensor([-0.1092]))])

In [19]:
net.state_dict()['2.bias']

tensor([0.1841])

In [8]:
net[0].bias

Parameter containing:
tensor([-2.5392e-04, -2.9262e-01,  2.5721e-01,  2.1194e-01,  3.2232e-01,
        -2.4069e-01, -2.4105e-01, -4.0320e-01], requires_grad=True)

In [9]:
net[0].bias.data

tensor([-2.5392e-04, -2.9262e-01,  2.5721e-01,  2.1194e-01,  3.2232e-01,
        -2.4069e-01, -2.4105e-01, -4.0320e-01])

In [17]:
[(name, shape) for name, shape in net[0].named_parameters()]

[('weight',
  Parameter containing:
  tensor([[-0.3433,  0.0202, -0.3418, -0.2295],
          [ 0.1200, -0.3384,  0.0058,  0.1859],
          [-0.1170,  0.0241, -0.3259,  0.1644],
          [-0.2734,  0.0632,  0.1411,  0.3852],
          [ 0.3043,  0.2815, -0.3508, -0.0904],
          [-0.1149,  0.1635, -0.3232,  0.0445],
          [-0.3872, -0.0919,  0.2099, -0.1558],
          [ 0.4808, -0.4080,  0.2552,  0.4576]], requires_grad=True)),
 ('bias',
  Parameter containing:
  tensor([-2.5392e-04, -2.9262e-01,  2.5721e-01,  2.1194e-01,  3.2232e-01,
          -2.4069e-01, -2.4105e-01, -4.0320e-01], requires_grad=True))]

In [21]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                         nn.Linear(8, 4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

tensor([[-0.0120],
        [-0.0120]], grad_fn=<AddmmBackward>)

In [22]:
c

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)

In [24]:
rgnet[0][1][0].weight

Parameter containing:
tensor([[-0.0143, -0.4007,  0.2067,  0.4617],
        [-0.4873,  0.2232,  0.0149, -0.3594],
        [-0.2251,  0.1923,  0.1594, -0.2119],
        [ 0.0498,  0.4915,  0.2653,  0.4278],
        [-0.4776, -0.3709,  0.1616, -0.4788],
        [-0.3821,  0.0762,  0.0218,  0.2594],
        [ 0.2349, -0.0803,  0.0652,  0.0164],
        [ 0.4105,  0.2508, -0.3046,  0.2152]], requires_grad=True)

## Parameter Initialization

In [30]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data, net[0].bias.data

(tensor([[-0.0181,  0.0049,  0.0037,  0.0082],
         [-0.0065, -0.0117, -0.0130,  0.0044],
         [-0.0001,  0.0115, -0.0136, -0.0084],
         [-0.0073,  0.0003,  0.0185,  0.0156],
         [ 0.0070, -0.0058, -0.0128,  0.0003],
         [ 0.0119, -0.0112, -0.0067,  0.0014],
         [-0.0056, -0.0131,  0.0115, -0.0069],
         [-0.0022, -0.0157,  0.0049, -0.0038]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [35]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 2)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data, net[0].bias.data

(tensor([[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [37]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
net[2].apply(xavier)
net[2].weight.data, net[2].bias.data

(tensor([[-0.3953,  0.5425,  0.4089,  0.0133,  0.5771,  0.1212,  0.6860,  0.3200]]),
 tensor([0.]))

In [61]:
def my_init(m):
    if type(m) == nn.Linear:
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5
net.apply(my_init)
net[0].weight


Parameter containing:
tensor([[-0.0000,  5.8434, -8.6593,  0.0000],
        [ 5.7975,  6.0066,  6.9793,  9.2941],
        [ 5.7919, -5.8185, -9.5599,  0.0000],
        [-8.5591, -0.0000, -7.4196, -8.0596],
        [ 5.5867,  0.0000,  6.8820, -0.0000],
        [-5.7885, -9.2129, -0.0000, -6.3376],
        [-0.0000,  0.0000, -6.8207,  5.7054],
        [ 0.0000, -0.0000,  0.0000,  0.0000]], requires_grad=True)

In [62]:
net[0].weight.data[:] = 1
net[0].weight.data

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [64]:
net[0].weight.data[0, 0] = 42
net[0].weight.data

tensor([[42.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]])

## Tied Parameters

In [74]:
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8, 1))
net(X)

tensor([[0.1246],
        [0.1178]], grad_fn=<AddmmBackward>)

In [75]:
net[2].weight.data == net[4].weight.data

tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])