In [50]:
import torch
from torch import nn
from torch.nn import functional as F

In [51]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hide = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [52]:
model = nn.Sequential(MLP())
model.add_module('hide', nn.Linear(10, 1))
print(model)
print(model.state_dict().keys())
print(model.hide)
print(model.hide.bias)
print(model.hide.bias.data)
print(model.hide.bias.grad)
print([(a, b.shape) for (a, b) in model.named_parameters()])

Sequential(
  (0): MLP(
    (hide): Linear(in_features=20, out_features=256, bias=True)
    (out): Linear(in_features=256, out_features=10, bias=True)
  )
  (hide): Linear(in_features=10, out_features=1, bias=True)
)
odict_keys(['0.hide.weight', '0.hide.bias', '0.out.weight', '0.out.bias', 'hide.weight', 'hide.bias'])
Linear(in_features=10, out_features=1, bias=True)
Parameter containing:
tensor([-0.1073], requires_grad=True)
tensor([-0.1073])
None
[('0.hide.weight', torch.Size([256, 20])), ('0.hide.bias', torch.Size([256])), ('0.out.weight', torch.Size([10, 256])), ('0.out.bias', torch.Size([10])), ('hide.weight', torch.Size([1, 10])), ('hide.bias', torch.Size([1]))]


In [53]:
class Linear(nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(n_in, n_out))
        self.bias = nn.Parameter(torch.randn(n_out))

    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

model = nn.Sequential(Linear(64, 8), Linear(8, 2))
model(torch.rand(3, 64))

tensor([[0.0000, 1.9187],
        [0.0000, 6.7991],
        [0.4102, 6.8209]])

In [53]:
# torch.save
# torch.load
# state_dict(), load_state_dict()