In [2]:
import torch 
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[-0.3756, -0.0206, -0.0137, -0.1471, -0.0622,  0.0551,  0.0101,  0.0948,
         -0.0306, -0.0120],
        [-0.1742,  0.0040,  0.3375, -0.0211, -0.0876,  0.0562, -0.0419,  0.0349,
         -0.0102,  0.0833]], grad_fn=<AddmmBackward0>)

## Block

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super(). __init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))
    
net = MLP()
net(X)

tensor([[-0.0875,  0.0980,  0.1309, -0.1470, -0.2363, -0.0975,  0.1579, -0.0072,
          0.0162,  0.0631],
        [-0.0767,  0.2095,  0.0935, -0.0209, -0.3114,  0.0321,  0.2444, -0.2348,
         -0.0856,  0.0435]], grad_fn=<AddmmBackward0>)

In [6]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad = False)
        self.linear = nn.Linear(20, 20)
    
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

net = FixedHiddenMLP()
net(X)

tensor(-0.0812, grad_fn=<SumBackward0>)

In [7]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))


chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.1460, grad_fn=<SumBackward0>)

## Parametric Management

### Parameter visit

In [15]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size = (2, 4))
net(X)

print(net[2].state_dict(), end='\n\n')
print(type(net[2].bias), end='\n\n')
print(net[2].bias, end='\n\n')
print(net[2].bias.data, end='\n\n')
print(net.state_dict()['2.bias'].data)

OrderedDict([('weight', tensor([[ 0.1273, -0.3222, -0.1130,  0.0957,  0.2190, -0.0729,  0.0774, -0.3266]])), ('bias', tensor([0.0729]))])

<class 'torch.nn.parameter.Parameter'>

Parameter containing:
tensor([0.0729], requires_grad=True)

tensor([0.0729])

tensor([0.0729])


In [17]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()], end = '\n\n')
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


### Parameter initialization

In [18]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data

tensor([[-0.0042,  0.0154, -0.0099,  0.0145],
        [-0.0085, -0.0030,  0.0049,  0.0021],
        [ 0.0073,  0.0095, -0.0124,  0.0029],
        [ 0.0111,  0.0234,  0.0123, -0.0037],
        [-0.0050,  0.0257, -0.0017, -0.0067],
        [-0.0055,  0.0064, -0.0198, -0.0038],
        [-0.0092, -0.0043,  0.0111,  0.0170],
        [-0.0029, -0.0026,  0.0025, -0.0024]])

In [19]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

### Binding

In [20]:
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), 
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.Linear(8, 1))
net(X)

print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


In [4]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, X):
        return X - X.mean() 
    
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])