In [31]:
import torch
from torch import nn
from torch.nn import functional as F

In [None]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))

X = torch.rand(2, 4)
net(X)

OrderedDict([('weight', tensor([[-0.0755,  0.0491,  0.2022, -0.1779,  0.2136,  0.2753,  0.1082,  0.2108]])), ('bias', tensor([-0.1397]))])


In [45]:
print(net[0])
print(net[0].state_dict())
print(net[1])
print(net[1].state_dict())
print(net[2])
print(net[2].state_dict())

Linear(in_features=4, out_features=8, bias=True)
OrderedDict([('weight', tensor([[-0.1338,  0.3891,  0.1924,  0.1820],
        [ 0.4045,  0.4315, -0.2856, -0.3197],
        [-0.0210, -0.1778, -0.1424, -0.2285],
        [-0.3273,  0.2864, -0.3558,  0.3315],
        [ 0.3726,  0.4517,  0.1378, -0.1913],
        [ 0.4670, -0.2012, -0.0907, -0.1972],
        [-0.0560, -0.4615, -0.3235, -0.3019],
        [-0.4448,  0.1539, -0.2301, -0.0402]])), ('bias', tensor([ 0.2390,  0.0777, -0.2195, -0.0813, -0.2351, -0.1556, -0.4854,  0.2245]))])
ReLU()
OrderedDict()
Linear(in_features=8, out_features=1, bias=True)
OrderedDict([('weight', tensor([[-0.0755,  0.0491,  0.2022, -0.1779,  0.2136,  0.2753,  0.1082,  0.2108]])), ('bias', tensor([-0.1397]))])


In [46]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [None]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [54]:
net = MLP()
Y = net(X)
Y

tensor([[ 0.0299, -0.0937, -0.1517, -0.1780, -0.0717,  0.0128,  0.3310, -0.3360,
          0.1727, -0.3353],
        [ 0.2103,  0.2387, -0.5391, -0.3810,  0.1828,  0.0605, -0.3064,  0.2075,
          0.3112, -0.1060]], grad_fn=<AddmmBackward0>)

In [55]:
torch.save(net.state_dict(), 'mlp.params')

In [56]:
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [57]:
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

In [28]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self._modules[str(idx)] = module
            print(self._modules)  #XXX Elegant realization

    def forward(self, X):
        for block in self._modules.values():
            print(block)  #XXX hidden layer, activation, output layer
            X = block(X)
        return X

In [29]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

OrderedDict([('0', Linear(in_features=20, out_features=256, bias=True))])
OrderedDict([('0', Linear(in_features=20, out_features=256, bias=True)), ('1', ReLU())])
OrderedDict([('0', Linear(in_features=20, out_features=256, bias=True)), ('1', ReLU()), ('2', Linear(in_features=256, out_features=10, bias=True))])
Linear(in_features=20, out_features=256, bias=True)
ReLU()
Linear(in_features=256, out_features=10, bias=True)


tensor([[ 0.0173,  0.0371,  0.0194, -0.3660,  0.2158, -0.0334,  0.0176,  0.2714,
         -0.1984,  0.1344],
        [-0.0393,  0.0544, -0.1065, -0.2519,  0.2723, -0.0244,  0.0834,  0.2641,
         -0.2941,  0.2932]], grad_fn=<AddmmBackward0>)

---