In [2]:
import torch
from torch import nn
from torch.nn import functional as F

In [36]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(20)
net(X)

tensor([ 0.0296,  0.0016,  0.0473,  0.0491, -0.1060,  0.0293, -0.1065, -0.0795,
         0.1105, -0.1149], grad_fn=<AddBackward0>)

In [16]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, x):
        return self.out(F.relu(self.hidden(x)))
net = MLP()
net(X)

tensor([-0.1587, -0.1536,  0.4256, -0.2128,  0.0701, -0.1612, -0.0923,  0.0354,
         0.2073, -0.0137], grad_fn=<AddBackward0>)

In [20]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, model in enumerate(args):
            self._modules[str(idx)] = model
            
    def forward(self, x):
        for block in self._modules.values():
            x = block(x)
        return x
        
net = MySequential(nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10)))
net(X)

tensor([-0.0162, -0.0063,  0.0472, -0.0638, -0.0899,  0.2424, -0.0102,  0.0774,
         0.1852, -0.0770], grad_fn=<AddBackward0>)

In [45]:
class MySequentialList(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.models = []
        for idx, model in enumerate(args):
            self.models.append(model)
            
    def forward(self, x):
        for block in self.models:
            x = block(x)
        return x
        
net = MySequential(nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10)))
net(X)

tensor([-0.0285, -0.0871,  0.0590, -0.1573,  0.0392,  0.0974,  0.0253, -0.0467,
        -0.0426,  0.2539], grad_fn=<AddBackward0>)

In [33]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # Use the created constant parameters, as well as the `relu` and `mm`
        # functions
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.linear(X)
        # Control flow
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
x = torch.rand((10, 20))
net = FixedHiddenMLP()
net(x)

tensor(-0.1398, grad_fn=<SumBackward0>)

In [40]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
        
    def forward(self, x):
        return self.linear(self.net(x))
       
net = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
net(x)

tensor(-0.5488, grad_fn=<SumBackward0>)