In [5]:
import torch
from torch import nn

class MLP(nn.Module):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Linear(784, 256)
        self.act = nn.ReLU()
        self.output = nn.Linear(256, 10)

    def forward(self, X):
        a = self.act(self.hidden(X))
        return self.output(a)

In [9]:
X = torch.rand(2, 784)
net = MLP()
print(net)
net(X)

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[-0.1444,  0.1361, -0.1251, -0.2153,  0.0764, -0.0425, -0.0691, -0.1184,
         -0.0567,  0.1244],
        [-0.1878,  0.1356, -0.2063, -0.2564,  0.0615, -0.0654, -0.1452, -0.1047,
         -0.2144,  0.2154]], grad_fn=<AddmmBackward>)

In [20]:
from collections import OrderedDict

class MySequential(nn.Module):
    def __init__(self, *args):
        super(MySequential, self).__init__()
        if len(args) == 1 and isinstance(args[0], OrderedDict):
            for key, module in args[0].items():
                self.add_module(key, module)
        else:
            for idx, module in enumerate(args):
                print(idx, module)
                self.add_module(str(idx), module)
    def forward(self, input):
        for module in self._modules.values():
            input = module(input)
        return input

In [22]:
net = MySequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)
print(net)
net(X)

0 Linear(in_features=784, out_features=256, bias=True)
1 ReLU()
2 Linear(in_features=256, out_features=10, bias=True)
MySequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.1285, -0.2139, -0.0032, -0.0442,  0.1481,  0.1225, -0.0504,  0.0626,
         -0.2232, -0.0192],
        [-0.0088, -0.0486, -0.1121, -0.0623,  0.1350,  0.1917, -0.0957,  0.0221,
         -0.1211, -0.1072]], grad_fn=<AddmmBackward>)

In [24]:
net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10))
print(net[-1])
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [26]:
net = nn.ModuleDict({
    'linear': nn.Linear(784, 256),
    'act': nn.ReLU(),
})
net['output'] = nn.Linear(256, 10)
print(net['linear'])
print(net.output)
print(net)

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


In [29]:
class FancyMLP(nn.Module):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)

        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, x):
        x = self.linear(x)
        x = nn.functional.relu(torch.mm(x, self.rand_weight.data) + 1)

        x = self.linear(x)

        while x.norm().item() > 1:
            x /= 2
        if x.norm().item() < 0.8:
            x *= 10
        return x.sum()

In [31]:
X = torch.rand(2, 20)
net = FancyMLP()
print(net)
print(net(X))

FancyMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)
tensor(-1.3568, grad_fn=<SumBackward0>)


In [33]:
class NestMLP(nn.Module):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential(nn.Linear(40, 30), nn.ReLU())

    def forward(self, x):
        return self.net(x)

In [37]:
X = torch.rand(2, 40)
net = nn.Sequential(NestMLP(), nn.Linear(30, 20), FancyMLP())
print(net)
print(net(X))

Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ReLU()
    )
  )
  (1): Linear(in_features=30, out_features=20, bias=True)
  (2): FancyMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)
tensor(-0.0049, grad_fn=<SumBackward0>)


In [59]:
import torch
from torch import nn
from torch.nn import init

net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))
# net = nn.Sequential(nn.Linear(4, 4), net)

print(net)
X = torch.rand(2, 4)
print(net(X).sum())

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)
tensor(0.1042, grad_fn=<SumBackward0>)


In [45]:
print(type(net.named_parameters()))
for name, param in net[0].named_parameters():
    print(name, param.size(), type(param))

<class 'generator'>
weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>


In [49]:
class MyModel(nn.Module):
    def __init__(self, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        self.weight1 = nn.Parameter(torch.rand(20, 20))
        self.weight2 = torch.rand(20, 20)

    def forward(self, X):
        pass

In [58]:
n = MyModel()
for name, param in net.named_parameters():
    print(name)

weight1


In [61]:
weight_0 = list(net[0].parameters())[0]
print(weight_0.data)
print(weight_0.grad)
net.backward()
print(weight_0.grad)

tensor([[ 0.3670, -0.3175,  0.2270, -0.3275],
        [-0.1356,  0.3234, -0.2678,  0.0667],
        [ 0.3938,  0.1340,  0.0116, -0.2163]])
None


ModuleAttributeError: 'Sequential' object has no attribute 'backward'

In [64]:
for name, param in net.named_parameters():
    if 'weight' in name:
        print(name, param.data)
        init.normal_(param, mean=0, std=0.01)
        print(name, param.data)
    if 'bias' in name:
        print(name, param)
        init.constant_(param, val=0)
        print(name, param)

0.weight tensor([[-0.0004, -0.0086, -0.0011,  0.0135],
        [-0.0120, -0.0007,  0.0024, -0.0013],
        [-0.0096, -0.0027, -0.0134, -0.0198]])
0.weight tensor([[ 1.1211e-02,  1.4294e-02,  1.6753e-02, -3.1562e-02],
        [ 1.1572e-02,  4.7239e-03, -9.8881e-05,  1.7044e-03],
        [ 1.1838e-02, -3.4664e-04, -1.8245e-02,  1.8689e-03]])
0.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
0.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
2.weight tensor([[-0.0056,  0.0106,  0.0069]])
2.weight tensor([[-0.0059,  0.0011, -0.0115]])
2.bias Parameter containing:
tensor([0.], requires_grad=True)
2.bias Parameter containing:
tensor([0.], requires_grad=True)


In [68]:
linear = nn.Linear(1, 1, bias=False)
net = nn.Sequential(linear, linear)
print(net)
for name, param in net.named_parameters():
    init.constant_(param, val=3)
    print(name, param.data)

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=False)
  (1): Linear(in_features=1, out_features=1, bias=False)
)
0.weight tensor([[3.]])


In [70]:
x = torch.ones(1, 1)
y = net(x).sum()
print(y)
y.backward()
print(net[0].weight.grad)

tensor(9., grad_fn=<SumBackward0>)
tensor([[12.]])


In [74]:
x = 2
torch.save(x, './x.pth')

In [76]:
y = torch.load('./x.pth')

In [78]:
print(y)

2
