In [None]:
# In this notebook, you learn:
# 
# 1) How to use pytorch module to build custom models?
# 2) How to use the Sequential module?
# 3) What are submodules of a module?
# 4) What is state dictionary of a module?
# 5) What are Buffers of a module?
# 6) How to use ModuleList?

In [1]:
import torch
from torch import nn, Tensor

In [1]:
# Official pytorch blog (https://pytorch.org/docs/stable/notes/modules.html) that explains about using module.
# The examples below are directly copied from the official blog. 

## Building Custom Modules

In [None]:
# Refer to 'modules/understanding_nn_linear.ipynb' to understand how to use the Linear Layer from
# pytorch built-in libraries.
# We will now try to build the linear layer ourself but using the pytorch 'module' library.

In [7]:
# All custom modules should inherit from the pytorch 'module'. It provides various useful features
# that help train the model.
class CustomLinearLayer(nn.Module):
    def __init__(self, num_in_features: int, num_out_features: int):
        super().__init__()
        # nn.Parameters are the learnable variables of the model. Pytorch tracks all the parameters
        # by default and calculates the gradients for these parameters during back propagration.
        self.weight = nn.Parameter(data=torch.randn(size=(num_in_features, num_out_features), dtype=torch.float), requires_grad=True)
        # nn.Paramters are also added to the attribute 'paramters' that is maintained by the
        # pytorch module class.
        self.bias = nn.Parameter(data=torch.randn(size=(num_out_features,), dtype=torch.float), requires_grad=True)
    
    # The forward function can perform any arbitrary operation. Here, we are just performing a 
    # linear transformation.
    def forward(self, input: Tensor):
        # '@' operation performs matrix multiplication. Refer to 'understanding_tensor_manipulations_part_4.ipynb'
        # to understand more about this operator.
        return (input @ self.weight) + self.bias

In [8]:
linear_layer = CustomLinearLayer(num_in_features=5, num_out_features=2)
print(linear_layer)

CustomLinearLayer()


In [11]:
sample_input_1 = torch.tensor(data=[1, 2, 3, 4, 5], dtype=torch.float)
print(sample_input_1)
print(sample_input_1.shape)

tensor([1., 2., 3., 4., 5.])
torch.Size([5])


In [12]:
# Module objects act as callables and calling it invokes the forward function.
output_1 = linear_layer(sample_input_1)
print(output_1)
print(output_1.shape)

tensor([ 6.6635, -9.2504], grad_fn=<AddBackward0>)
torch.Size([2])


## Sequential Module

In [13]:
# Modules can contain other modules to combine various functionalities. The simplest way to
# combine multiple modules is the Sequential module.

# The output of the current layer is directly fed into the next layer as input i.e., output
# of first CustomLinearLayer is passed as input to the ReLU function. The output of ReLU 
# function is in turn passed as input to the last CustomLinearLayer.
sequential_network = nn.Sequential(
    CustomLinearLayer(num_in_features=5, num_out_features=2),
    nn.ReLU(),
    CustomLinearLayer(num_in_features=2, num_out_features=1)
)
print(sequential_network)

Sequential(
  (0): CustomLinearLayer()
  (1): ReLU()
  (2): CustomLinearLayer()
)


In [14]:
sample_input_2 = torch.tensor(data=[1, 2, 3, 4, 5], dtype=torch.float)
print(sample_input_2)
print(sample_input_2.shape)

tensor([1., 2., 3., 4., 5.])
torch.Size([5])


In [15]:
output_2 = sequential_network(sample_input_2)
print(output_2)
print(output_2.shape)

tensor([-0.8641], grad_fn=<AddBackward0>)
torch.Size([1])


## SubModules of a Module

In [2]:
class CustomNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        # These are the children or submodules of CustomNetwork since these define the layers of the
        # CustomNetwork and are used in the forward method.
        self.linear_layer0 = nn.Linear(in_features=3, out_features=4)
        self.linear_layer1 = nn.Linear(in_features=4, out_features=1)
    
    def forward(self, input: Tensor) -> Tensor:
        output = self.linear_layer0(input)
        output = nn.ReLU(output)
        output = self.linear_layer1(output)
        return output

In [4]:
custom_network = CustomNetwork()
print(custom_network)

CustomNetwork(
  (linear_layer0): Linear(in_features=3, out_features=4, bias=True)
  (linear_layer1): Linear(in_features=4, out_features=1, bias=True)
)


In [5]:
# The children or submodules of a module can be accessed via the 'named_children()' method.
for child_module in custom_network.named_children():
    print(child_module)

('linear_layer0', Linear(in_features=3, out_features=4, bias=True))
('linear_layer1', Linear(in_features=4, out_features=1, bias=True))


## State Dicionary of a module

In [6]:
# The following documentation and examples have been copied from the official pytorch documentation.
# (https://pytorch.org/docs/stable/notes/modules.html#module-state).
#
# A module's state_dict contains all the information necessary to hold a model. This information includes:
# 1) Parameters: 
#       -- Trainable (learnable) model parameters.
# 2) Buffers
#       -- Non-Trainable (non-learnable) variables of the model.
#       -- Variables that are not trainable by the model but affect the computations performed by the model.
#       -- Buffers are of two types:
#               -- Persistent Buffers
#                       -- Contained within the state_dict.
#               -- Non Persistent Buffers
#                       -- Not contained within the state_dict.
#  

In [10]:
# An example usage of Buffers is the running mean that is maintained when Batch Normalization is used 
# within the models. The mean is just computed repeatedly to be used during the inference time, but 
# it is not learnt (or trained) during model training.
class RunningMean(nn.Module):
  # Please ignore the meaning of momentum for now. We will deal with this in other notebooks.
  def __init__(self, num_features, momentum=0.9):
    super().__init__()
    self.momentum = momentum
    # registed_buffer is used to declare the parameters as buffers.
    # https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.register_buffer
    # Excellent discussion (link below) on the usage of Buffers vs Parameters (with requires_grad = False)..
    # https://discuss.pytorch.org/t/what-does-register-buffer-do/121091
    # https://discuss.pytorch.org/t/what-is-the-difference-between-register-buffer-and-register-parameter-of-nn-module/32723
    self.register_buffer(name='mean', tensor=torch.zeros(num_features), persistent=True)
  def forward(self, x):
    self.mean = self.momentum * self.mean + (1.0 - self.momentum) * x
    return self.mean

In [9]:
# Notice that 'mean' tensor is saved in the state_dict since it is declared as 'persistent' buffer.
rm = RunningMean(4)
input = torch.randn(4)
rm(input)
print(rm.state_dict())

OrderedDict([('mean', tensor([-0.1160,  0.1041,  0.1457, -0.1391]))])


## [nn.ModuleList](https://pytorch.org/docs/stable/generated/torch.nn.ModuleList.html#modulelist)

In [None]:
# ModuleList is useful when we want to dynamically create a list of modules. If we create submodules and add
# them to a python list manually, then the submodules will not be registered as children of the parent module.
# Using ModuleList to store the list of modules is useful because Pytorch automatically registers the modules
# within the ModuleList as children of the parent module.

In [None]:
class DynamicLayer(nn.Module):
    def __init__(self, num_layers: int):
        super().__init__()
        self.layers = nn.ModuleList([nn.Linear(4, 4) for _ in range(num_layers)])
    def forward(self, input: Tensor) -> Tensor:
        for layer in self.layers:
        x = layer(x)
        return x