### What is the difference between `nn.ModuleList()` and `nn.Sequential()`?
- nn.Module/nn.ModuleList/nn.Sequential are all containers that we could add module in it 

## `nn.Module`
- torch docs: https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module
- Base class for all neural network modules 
- The module addded into `nn.ModuleList` would automatically  be registered at module networks 
    and the module's parameters is automatically appended into networks

In [23]:
# Construct a basic Network and add hook at forward
import torch 
import torch.nn as nn 
import torch.nn.functional as F 

class NetSample(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20,20,5)
        self.max_pool = nn.MaxPool2d(3,stride=1,padding=1)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.max_pool(x)
        return x 

def hook_fn(module, input, output):
    """Ref:https://blog.paperspace.com/pytorch-hooks-gradient-clipping-debugging/"""
    print(module)
    print("-------------Module parameters----------------")
    neles = sum( [ param.nelement() for param in module.parameters()])
    print(neles)
    print("-------------Input Grad ----------------")

    for grad in input:
        try:
            print(grad.shape)
        except AttributeError:
            print("None found for Gradient")
    print("-------------Output Grad ----------------")
    for grad in output:
        try: 
            print(grad.shape)
        except AttributeError:
            print("None found for Gradient")
    print("\n")

net = NetSample()
# Iterate the childern to register the forward hook
for i in net.children():
    print(i.register_forward_hook(hook_fn))

input = torch.randn(1,1,224,224)
net(input).shape


<torch.utils.hooks.RemovableHandle object at 0x7fea4080ced0>
<torch.utils.hooks.RemovableHandle object at 0x7fea4080ced0>
<torch.utils.hooks.RemovableHandle object at 0x7fea4080ced0>
Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
-------------Module parameters----------------
520
-------------Input Grad ----------------
torch.Size([1, 1, 224, 224])
-------------Output Grad ----------------
torch.Size([20, 220, 220])


Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1))
-------------Module parameters----------------
10020
-------------Input Grad ----------------
torch.Size([1, 20, 220, 220])
-------------Output Grad ----------------
torch.Size([20, 216, 216])


MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
-------------Module parameters----------------
0
-------------Input Grad ----------------
torch.Size([1, 20, 216, 216])
-------------Output Grad ----------------
torch.Size([20, 216, 216])




torch.Size([1, 20, 216, 216])

In [62]:
## Don't use Python built in list, the module appened in  builted in list could be registered at the networks

class NetWithPythonBuiltInList(nn.Module):
    def __init__(self):
        super(NetWithPythonBuiltInList,self).__init__()
        self.linears = [nn.Linear(10,10) for i in range(2)]
    
    def forward(self,x):
        for linear_ele in self.linears:
            x = linear_ele(x)
        return x 

net = NetWithPythonBuiltInList()
print(net)
print(list(net.parameters()))

NetWithPythonBuiltInList()
[]


### `nn.ModuleList`
- torch docs: https://pytorch.org/docs/stable/generated/torch.nn.ModuleList.html#torch.nn.ModuleList
- Could be indexed like a regular Python List 

In [41]:
# ModuleList can act as an iterable or be indexed using ints

class MyModule(nn.Module):
    def __init__(self):
        super(MyModule,self).__init__()
        self.linears = nn.ModuleList([ nn.Linear(10,10) for i in range(10) ]) 
    
    def forward(self,x):
        for index, linear_ele in enumerate(self.linears):
            self.linears[index//2](x) + linear_ele(x)
        return x 

def hook_fn(module, input, output):
    """Ref:https://blog.paperspace.com/pytorch-hooks-gradient-clipping-debugging/"""
    print(module)
    print("-------------Module parameters----------------")
    neles = sum( [ param.nelement() for param in module.parameters()])
    print(neles)
    print("-------------Input Grad ----------------")

    for grad in input:
        try:
            print(grad.shape)
        except AttributeError:
            print("None found for Gradient")
    print("-------------Output Grad ----------------")
    for grad in output:
        try: 
            print(grad.shape)
        except AttributeError:
            print("None found for Gradient")
    print("\n")

net = MyModule()
# Iterate the childern to register the forward hook
# We may meet some problem to register at ModuleList when we use `.named_childern()`
# Same issue: https://stackoverflow.com/questions/69078576/pytorch-hook-function-is-not-executed
# a ModuleList only have a single child, which contains multiple sub modules
# for name, module in net.named_children():
#     print(name, module )
    # print(module.register_forward_hook(hook_fn))


# We could use `.named_modules` to register
# Bute `.named_modules` would recursively return the complete model
for name, module in net.named_modules():
    if isinstance(module,nn.Linear):
        print(name, module )
        print(module.register_forward_hook(hook_fn))

input = torch.randn(1,10)
net(input)


linears.0 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea613dff90>
linears.1 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea710ad850>
linears.2 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea710acc90>
linears.3 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea70fa0290>
linears.4 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea613e33d0>
linears.5 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea613e3550>
linears.6 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea710c8450>
linears.7 Linear(in_features=10, out_features=10, bias=True)
<torch.utils.hooks.RemovableHandle object at 0x7fea710c8f50>
linears.8 Linear(in_feat

tensor([[-2.3160,  0.7772, -0.7059, -0.1074, -0.4508,  0.4547,  0.5520, -0.2393,
         -0.2101,  1.0877]])

In [44]:
# Iterate print the parameters
for param in net.parameters():
    print(type(param.data), param.size())

<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])


In [46]:
# Use nn.ModuleList with index 
# - The module's parameters in the nn.ModuleList are shareable even thouth the module is called multiple times

class net4(nn.Module):
    def __init__(self):
        super(net4,self).__init__()
        self.linears = nn.ModuleList([nn.Linear(5,10), nn.Linear(10,10)])
    
    def forward(self,x):
        x = self.linears[0](x)
        x = self.linears[1](x)
        x = self.linears[1](x)
        return x 

net = net4()

print(net)


#Iteratively print the module 
for name, param in net.named_parameters():
    print(name, param.size())

net4(
  (linears): ModuleList(
    (0): Linear(in_features=5, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
  )
)
linears.0.weight torch.Size([10, 5])
linears.0.bias torch.Size([10])
linears.1.weight torch.Size([10, 10])
linears.1.bias torch.Size([10])


## `nn.Sequential`
- Implement forward function in it
- The module in the `nn.Sequential` is list by the order, so we should make sure the previous module output size is the same as the next module size input

In [49]:
# Example of using nn.Sequential
class NetWithSequential(nn.Module):
    def __init__(self):
        super(NetWithSequential,self).__init__()
        self.block = nn.Sequential( 
            nn.Conv2d(1,20,5),
            nn.ReLU(),
            nn.Conv2d(20,64,5),
            nn.ReLU()
        )
    
    def forward(self, x):
        x = self.block(x)
        return x 


net = NetWithSequential()
print(net)

dummy_input = torch.randn(1,224,224)
print(net(dummy_input).shape)
    

NetWithSequential(
  (block): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
)
torch.Size([64, 216, 216])


In [64]:
# Example of using Sequential with OrderedDict 

import collections 

class NetWithSequentialOrderDict(nn.Module):
    def __init__(self):
        super(NetWithSequentialOrderDict,self).__init__()

        # We use `collections.OrderedDict` to specify the name of every module 
        self.block = nn.Sequential( 
            collections.OrderedDict([
             ('conv1',nn.Conv2d(1,20,5) ),
             ('relu1',nn.ReLU()),
             ('conv2',nn.Conv2d(20,64,5)),
             ('relu2',nn.ReLU())
            ])
        )
    
    def forward(self, x):
        x = self.block(x)
        return x 


net = NetWithSequentialOrderDict()
print(net)

# print(list(net.parameters()))

NetWithSequentialOrderDict(
  (block): Sequential(
    (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (relu2): ReLU()
  )
)


In [60]:
# What is `*` in python ?
# 1. A method to unpack a list
#    - https://www.learncodewithmike.com/2019/12/python-unpacking.html
# 2. When we don't know the accurate numbers of argument to pass 
#    - https://developer.aliyun.com/article/282239


# Example 1. A method to unpack a list
letters = ["A", "B", "C", "D", "E","F","G","H","I"]
# An example to fetch the first/last element and use * to pack the other elements
first, *other, last = letters 
print(first)
print(other)
print(last)

# An example to independently define the needed numbers of variables and unpackaging the last elements
first,second, *other = letters 
print(first)
print(second)
print(other)


# Example 2. When we don't know the accurate numbers of argument to pass 
a = ['123','3434','5']
b = ['123','3434','5','123','3434','5']

def print_fn(*list_):
    for ele in list_:
        print(ele)

print_fn(b)
print_fn(a)

A
['B', 'C', 'D', 'E', 'F', 'G', 'H']
I
A
B
['C', 'D', 'E', 'F', 'G', 'H', 'I']
['123', '3434', '5', '123', '3434', '5']
['123', '3434', '5']


## Scenario to use `nn.ModuleList` and `nn.Sequential`

1. Network contains many repeat layers: we may use `for` to  construct them
    a. Use `list` to place the module
    b. Use `nn.Sequential` to package the list

2. We need the past layers output 

In [66]:
#Ref:https://zhuanlan.zhihu.com/p/64990232
class net7(nn.Module):
    def __init__(self):
        super(net7, self).__init__()
        self.linear_list = [nn.Linear(10, 10) for i in range(3)]
        self.linears = nn.Sequential(*self.linear_list)

    def forward(self, x):
        self.x = self.linears(x)
        return x

net = net7()
print(net)
# net7(
#   (linears): Sequential(
#     (0): Linear(in_features=10, out_features=10, bias=True)
#     (1): Linear(in_features=10, out_features=10, bias=True)
#     (2): Linear(in_features=10, out_features=10, bias=True)
#   )
# )

net7(
  (linears): Sequential(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
  )
)


In [68]:
# Ref: https://zhuanlan.zhihu.com/p/64990232
class net8(nn.Module):
    def __init__(self):
        super(net8, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 20), nn.Linear(20, 30), nn.Linear(30, 50)])
        self.trace = []

    def forward(self, x):
        for layer in self.linears:
            x = layer(x)
            self.trace.append(x)
        return x

net = net8()
input  = torch.randn(32, 10) # input batch size: 32
output = net(input)
for each in net.trace:
    print(each.shape)
# torch.Size([32, 20])
# torch.Size([32, 30])
# torch.Size([32, 50])

torch.Size([32, 20])
torch.Size([32, 30])
torch.Size([32, 50])
