In [1]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[ 0.0578, -0.0836, -0.3641,  0.2494, -0.0491, -0.0089,  0.1808, -0.2134,
         -0.0831, -0.0790],
        [ 0.0251, -0.0319, -0.2416,  0.4722,  0.0154, -0.0632,  0.0998, -0.2911,
         -0.2112, -0.1409]], grad_fn=<AddmmBackward>)

In [2]:
class MLP(nn.Module):
    # Declare a layer with model parameters. Here, we declare two fully
    # connected layers
    def __init__(self):
        # Call the constructor of the `MLP` parent class `Module` to perform
        # the necessary initialization. In this way, other function arguments
        # can also be specified during class instantiation, such as the model
        # parameters, `params` (to be described later)
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # Hidden layer
        self.out = nn.Linear(256, 10)  # Output layer

    # Define the forward propagation of the model, that is, how to return the
    # required model output based on the input `X`
    def forward(self, X):
        # Note here we use the funtional version of ReLU defined in the
        # nn.functional module.
        return self.out(F.relu(self.hidden(X)))

In [3]:
net = MLP()
net(X)

tensor([[-0.2422,  0.2483, -0.0187,  0.0448,  0.0763, -0.2808,  0.1323, -0.1429,
          0.0937, -0.2280],
        [-0.0561,  0.2975, -0.0573,  0.0611,  0.1569, -0.2069, -0.0191, -0.2504,
          0.0845, -0.2010]], grad_fn=<AddmmBackward>)

In [4]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            # Here, `module` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            X = block(X)
        return X

In [24]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
result = net(X)
print('Result:', result)
net.state_dict()

Result: tensor([[ 0.1185,  0.0720, -0.2303,  0.1036,  0.0335,  0.0879, -0.0021, -0.2081,
         -0.0555,  0.0891],
        [ 0.1720,  0.1440, -0.1750,  0.0897,  0.0380,  0.1066,  0.0493, -0.2080,
         -0.0420,  0.1101]], grad_fn=<AddmmBackward>)


OrderedDict([('0.weight',
              tensor([[ 0.0457, -0.1934, -0.1552,  ..., -0.0041,  0.2154, -0.0730],
                      [ 0.0066,  0.0865, -0.1483,  ...,  0.1223,  0.0777, -0.0900],
                      [-0.0904, -0.1296, -0.1531,  ..., -0.1750, -0.1695,  0.0273],
                      ...,
                      [ 0.1786,  0.1122, -0.1993,  ...,  0.1870, -0.1312, -0.2168],
                      [ 0.2013,  0.2107,  0.2215,  ..., -0.0210, -0.0707, -0.0265],
                      [ 0.0755,  0.0598, -0.1234,  ...,  0.1085,  0.1218,  0.0070]])),
             ('0.bias',
              tensor([-9.0977e-02,  3.2079e-02, -4.0121e-02,  1.4035e-01,  2.2254e-01,
                      -1.2892e-01, -6.8983e-02,  1.9484e-01, -2.2253e-01, -1.2277e-01,
                      -7.6194e-02, -1.9712e-01, -1.4121e-01,  3.6746e-02,  1.1090e-01,
                       1.1370e-02, -1.5556e-01, -2.0861e-01, -2.1543e-01, -4.5025e-02,
                      -2.3293e-02, -1.4584e-01,  1.6882e-01,  2.1979

In [25]:
g = net.parameters()
for p in g:
    print(p)

Parameter containing:
tensor([[ 0.0457, -0.1934, -0.1552,  ..., -0.0041,  0.2154, -0.0730],
        [ 0.0066,  0.0865, -0.1483,  ...,  0.1223,  0.0777, -0.0900],
        [-0.0904, -0.1296, -0.1531,  ..., -0.1750, -0.1695,  0.0273],
        ...,
        [ 0.1786,  0.1122, -0.1993,  ...,  0.1870, -0.1312, -0.2168],
        [ 0.2013,  0.2107,  0.2215,  ..., -0.0210, -0.0707, -0.0265],
        [ 0.0755,  0.0598, -0.1234,  ...,  0.1085,  0.1218,  0.0070]],
       requires_grad=True)
Parameter containing:
tensor([-9.0977e-02,  3.2079e-02, -4.0121e-02,  1.4035e-01,  2.2254e-01,
        -1.2892e-01, -6.8983e-02,  1.9484e-01, -2.2253e-01, -1.2277e-01,
        -7.6194e-02, -1.9712e-01, -1.4121e-01,  3.6746e-02,  1.1090e-01,
         1.1370e-02, -1.5556e-01, -2.0861e-01, -2.1543e-01, -4.5025e-02,
        -2.3293e-02, -1.4584e-01,  1.6882e-01,  2.1979e-01,  1.1667e-01,
        -8.3702e-02,  3.2990e-02,  5.0170e-02, -2.1771e-01,  1.4476e-01,
         1.3525e-01,  1.7044e-01, -2.1055e-01, -6.3055e-0

In [6]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # Use the created constant parameters, as well as the `relu` and `mm`
        # functions
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.linear(X)
        # Control flow
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [7]:
net = FixedHiddenMLP()
net(X)

tensor(0.0366, grad_fn=<SumBackward0>)

In [8]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.2177, grad_fn=<SumBackward0>)

In [28]:
class MySequentialExer(nn.Module):
    def __init__(self, *args):
        super().__init__()
        
        self.modules = []
        for idx, module in enumerate(args):
                self.modules.append(module)
                
    def forward(self, X):
        for block in self.modules:
            X = block(X)
        return X

In [29]:
net = MySequentialExer(nn.Linear(20, 10), nn.ReLU(), nn.Linear(10, 1))
result = net(X)
print('Inference Result:', result)
net.state_dict()

Inference Result: tensor([[-0.0223],
        [ 0.0319]], grad_fn=<AddmmBackward>)


OrderedDict()

In [30]:
g = net.parameters()
for p in g:
    print(p)

<generator object Module.named_parameters at 0x7f5aeea2cdd0>

In [27]:
net.named_parameters

<bound method Module.named_parameters of MySequential(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)>

In [52]:
class Block(nn.Module):
    def __init__(self, ins, outs, non_linear=False):
        super().__init__()
        self.layer = nn.Linear(ins, outs)
        self.non_linear = non_linear
        if self.non_linear:
            self.non_linear_layer = nn.ReLU()
        
    def forward(self, x):
        x = self.layer(x)
        if self.non_linear:
            x = self.non_linear_layer(x)
        return x

In [53]:
def build_blocks(ins, outs, n, non_linear=False):
    modules = nn.Sequential()
    for i in range(n-1):
        modules.add_module(str(i), Block(ins, ins, non_linear=non_linear))
    modules.add_module(str(n), Block(ins, outs))
    return modules

In [54]:
net = build_blocks(20, 1, 10, non_linear=True)

In [55]:
net

Sequential(
  (0): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (1): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (2): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (3): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (4): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (5): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (6): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (7): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (non_linear_layer): ReLU()
  )
  (8): Block(
    (layer): Linear(in_features=20, out_features=20, bias=True)
    (n