In [2]:
import torch
from torch import nn
from torch.nn import functional as F


x = torch.randn(2,20)
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(x)

tensor([[-0.0487, -0.0099,  0.0349,  0.0285, -0.1002,  0.4189, -0.0097,  0.1261,
          0.0550,  0.4225],
        [-0.1577,  0.1508, -0.2089,  0.1133,  0.1146, -0.0035, -0.1064,  0.0473,
         -0.1553,  0.0834]], grad_fn=<AddmmBackward>)

In [3]:
class MLP(nn.Module):
    # Declare a layer with model parameters. Here, we declare two fully
    # connected layers
    def __init__(self):
        # Call the constructor of the MLP parent class Block to perform the
        # necessary initialization. In this way, other function parameters can
        # also be specified when constructing an instance, such as the model
        # parameter, params, described in the following sections
        super().__init__()
        self.hidden = nn.Linear(20,256)  # Hidden layer
        self.output = nn.Linear(256,10)  # Output layer

    # Define the forward computation of the model, that is, how to return the
    # required model output based on the input x
    def forward(self, x):
        # Note here we use the funtional version of ReLU defined in the
        # nn.functional module.
        return self.output(F.relu(self.hidden(x)))

In [4]:
net = MLP()
net(x)

tensor([[-0.1617, -0.1417, -0.3657, -0.2014, -0.0107,  0.1979, -0.0304,  0.3016,
         -0.5691, -0.0439],
        [-0.0820,  0.1101, -0.2837, -0.2204,  0.2489,  0.1242, -0.0813,  0.0732,
         -0.4191,  0.5830]], grad_fn=<AddmmBackward>)

In [5]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, block is an instance of a Module subclass. We save it in the
            # member variable _modules of the Module class, and its type is
            # OrderedDict.
            self._modules[block] = block

    def forward(self, x):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            x = block(x)
        return x

In [6]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(x)

tensor([[-0.2042,  0.0899,  0.1858, -0.6629,  0.4889,  0.0235,  0.2252, -0.0134,
          0.3237, -0.1445],
        [ 0.1660,  0.1363, -0.0143, -0.2073, -0.0727, -0.0815,  0.1141, -0.3287,
          0.0956, -0.6264]], grad_fn=<AddmmBackward>)

In [7]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # and therefore keep constant during training.
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, x):
        x = self.linear(x)
        # Use the constant parameters created, as well as the relu
        # and dot functions
        x = F.relu(torch.mm(x, self.rand_weight) + 1)
        # Reuse the fully connected layer. This is equivalent to sharing
        # parameters with two fully connected layers
        x = self.linear(x)
        # Here in Control flow, we need to call asscalar to return the scalar
        # for comparison
        while x.norm().item() > 1:
            x /= 2
        return x.sum()

In [8]:
net = FixedHiddenMLP()
net(x)

tensor(-0.1980, grad_fn=<SumBackward0>)

In [9]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, x):
        return self.linear(self.net(x))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(x)

tensor(-1.2686, grad_fn=<SumBackward0>)