# 5.1. Layers and Blocks

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[ 0.0308,  0.2777, -0.3469,  0.0461,  0.0038, -0.0363, -0.0876,  0.1536,
          0.0184,  0.1103],
        [ 0.0115,  0.1792, -0.1553, -0.0465, -0.0214, -0.1112, -0.0146,  0.0237,
          0.0601,  0.0244]], grad_fn=<AddmmBackward0>)

## 5.1.1. A Custom Block

In [2]:
class MLP(nn.Module):
    # Declare a layer with model parameters. Here, we declare two fully
    # connected layers
    def __init__(self):
        # Call the constructor of the `MLP` parent class `Module` to perform
        # the necessary initialization. In this way, other function arguments
        # can also be specified during class instantiation, such as the model
        # parameters, `params` (to be described later)
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # Hidden layer
        self.out = nn.Linear(256, 10)  # Output layer

    # Define the forward propagation of the model, that is, how to return the
    # required model output based on the input `X`
    def forward(self, X):
        # Note here we use the funtional version of ReLU defined in the
        # nn.functional module.
        return self.out(F.relu(self.hidden(X)))

In [3]:
net = MLP()
net(X)

tensor([[ 0.1738,  0.1398, -0.0119, -0.0297, -0.0875, -0.0662, -0.0092,  0.0929,
         -0.2273, -0.0150],
        [ 0.1450, -0.0398, -0.0318, -0.0138, -0.0962, -0.0406, -0.0115,  0.1156,
         -0.0477, -0.0141]], grad_fn=<AddmmBackward0>)

## 5.1.2. The Sequential Block

In [4]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            # Here, `module` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            X = block(X)
        return X

In [5]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.1538,  0.0656,  0.0823, -0.1595,  0.2318, -0.2962, -0.1597, -0.1017,
          0.0809, -0.1212],
        [ 0.0398,  0.0494,  0.0740, -0.2106,  0.1352, -0.3015, -0.0705, -0.0108,
          0.0888, -0.0439]], grad_fn=<AddmmBackward0>)

## 5.1.3. Executing Code in the Forward Propagation Function

In [6]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # Use the created constant parameters, as well as the `relu` and `mm`
        # functions
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.linear(X)
        # Control flow
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [7]:
net = FixedHiddenMLP()
net(X)

tensor(-0.2846, grad_fn=<SumBackward0>)

In [8]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(-0.2296, grad_fn=<SumBackward0>)

## 5.1.4. Efficiency

The avid reader might start to worry about the efficiency of some of these operations. After all, we have lots of dictionary lookups, code execution, and lots of other Pythonic things taking place in what is supposed to be a high-performance deep learning library. The problems of Python’s global interpreter lock are well known. In the context of deep learning, we may worry that our extremely fast GPU(s) might have to wait until a puny CPU runs Python code before it gets another job to run.

## 5.1.5. Summary

Layers are blocks.

Many layers can comprise a block.

Many blocks can comprise a block.

A block can contain code.

Blocks take care of lots of housekeeping, including parameter initialization and backpropagation.

Sequential concatenations of layers and blocks are handled by the Sequential block.