In [79]:
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/pytorch_book')
os.getcwd()

'/content/drive/MyDrive/Colab Notebooks/pytorch_book'

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

In [None]:
x = torch.rand(2, 20)

In [None]:
# custom block
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, x):
        return self.out(F.relu(self.hidden(x)))

In [None]:
net = MLP()
net(x)

tensor([[-0.2075, -0.1071, -0.0172, -0.1940, -0.0681,  0.0906, -0.2800,  0.0826,
          0.0927,  0.1752],
        [-0.1808, -0.0493,  0.0734, -0.0933, -0.0688,  0.0772, -0.2480,  0.1301,
         -0.0401,  0.1569]], grad_fn=<AddmmBackward0>)

In [None]:
# sequential block
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self._modules[str(idx)] = module
    
    def forward(self, x):
        for block in self._modules.values():
            x = block(x)
        return x

In [None]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(x)

tensor([[ 0.0944, -0.0115, -0.1819,  0.0483, -0.1098, -0.1879,  0.0701, -0.1118,
         -0.1154,  0.0882],
        [ 0.1912, -0.0077, -0.1513,  0.0765, -0.1079, -0.0765,  0.0244, -0.0470,
         -0.0554,  0.1280]], grad_fn=<AddmmBackward0>)

In [None]:
# fixed parameters
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad = False)
        self.linear = nn.Linear(20, 20)
    
    def forward(self, x):
        x = self.linear(x)
        x = F.relu(torch.mm(x, self.rand_weight) + 1)
        x = self.linear(x)
        while x.abs().sum() > 1:
            x /= 2
        return x.sum()

In [None]:
net = FixedHiddenMLP()
net(x)

tensor(0.0386, grad_fn=<SumBackward0>)

In [None]:
# random combination
class NextMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, x):
        return self.linear(self.net(x))

In [None]:
chimera = nn.Sequential(NextMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(x)

tensor(0.1032, grad_fn=<SumBackward0>)

### Parameter Access

In [None]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
x = torch.rand(size = (2, 4))
net(x)
print(net[2].state_dict())
print(net[2].bias)
print(net[2].bias.data)
print(net[2].weight.grad == None)

OrderedDict([('weight', tensor([[ 0.0864, -0.1499, -0.1190,  0.2750, -0.3237, -0.1336, -0.2402, -0.1991]])), ('bias', tensor([-0.1139]))])
Parameter containing:
tensor([-0.1139], requires_grad=True)
tensor([-0.1139])
True


In [None]:
# all parameters at once
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])
# from state_dict()
print(net.state_dict()['2.bias'].data)

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [None]:
# collecting parameters from nested blocks
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                         nn.Linear(8, 4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(x)
print(rgnet[0][1][0].bias.data) # nested structure

tensor([[-0.2217],
        [-0.2215]], grad_fn=<AddmmBackward0>)

tensor([ 0.1958,  0.3506,  0.3878, -0.0791,  0.3250, -0.3055,  0.1278, -0.2377])

In [None]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)
