# MODULE
Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes:

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5) #module也可以包含其他module
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

In [2]:
# apply(fn): Applies fn recursively to every submodule (as returned by .children()) as well as self. 
# Typical use includes initializing the parameters of a model.
@torch.no_grad()
def init_weights(m): # m为modules
    print(m)
    if type(m) == nn.Linear:
        m.weight.fill_(1.0)
        print(m.weight)
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 1))
net.apply(init_weights) # apply最后会作用在自身module上

Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
Linear(in_features=2, out_features=1, bias=True)
Parameter containing:
tensor([[1., 1.]], requires_grad=True)
Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=1, bias=True)
)


Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=1, bias=True)
)

#### More APIs can be found [here](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module).

## Introduce API
### Part of the class [nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module) source code is cited below

#### \_\_init__(),   register_buffer(),   register_parameter(),   add_module(),   get_submodule()等函数

In [None]:
    ...... # 前面内容省略
    
    def __init__(self) -> None: # 构造函数
        """
        Initializes internal Module state, shared by both nn.Module and ScriptModule.
        """
        torch._C._log_api_usage_once("python.nn_module")

        self.training = True  #默认training=True，所以dropout，BN等都默认遵循training=True的情况
        self._parameters: Dict[str, Optional[Parameter]] = OrderedDict()  #成员变量储存类型多为字典
        self._buffers: Dict[str, Optional[Tensor]] = OrderedDict()
        self._non_persistent_buffers_set: Set[str] = set()
        self._backward_hooks: Dict[int, Callable] = OrderedDict()
        self._is_full_backward_hook = None
        self._forward_hooks: Dict[int, Callable] = OrderedDict()
        self._forward_pre_hooks: Dict[int, Callable] = OrderedDict()
        self._state_dict_hooks: Dict[int, Callable] = OrderedDict()
        self._load_state_dict_pre_hooks: Dict[int, Callable] = OrderedDict()
        self._modules: Dict[str, Optional['Module']] = OrderedDict()

    forward: Callable[..., Any] = _forward_unimplemented

        
   
    def register_buffer(self, name: str, tensor: Optional[Tensor], persistent: bool = True) -> None: # 注册buffer的函数
        
        # buffer不是模型参数如批归一化的数据的均值方差，不是模型参数，而是数据的一些统计值，但也是模型的一部分（module的一个状态）
        # buffer正常是持久的，正常和parameters一起保存在state_dict中
        r"""Adds a buffer to the module.

        This is typically used to register a buffer that should not to be
        considered a model parameter. For example, BatchNorm's ``running_mean``
        is not a parameter, but is part of the module's state. Buffers, by
        default, are persistent and will be saved alongside parameters. This
        behavior can be changed by setting :attr:`persistent` to ``False``. The
        only difference between a persistent buffer and a non-persistent buffer
        is that the latter will not be a part of this module's
        :attr:`state_dict`.

        Buffers can be accessed as attributes using given names.

        Args:
            name (string): name of the buffer. The buffer can be accessed
                from this module using the given name
            tensor (Tensor or None): buffer to be registered. If ``None``, then operations
                that run on buffers, such as :attr:`cuda`, are ignored. If ``None``,
                the buffer is **not** included in the module's :attr:`state_dict`.
            persistent (bool): whether the buffer is part of this module's         #是否保存下来
                :attr:`state_dict`.

        Example::

            >>> self.register_buffer('running_mean', torch.zeros(num_features)) # 例子，比如要自己实现batchnorm类等会用到

        """
        
    ...... # 后面内容省略   
    
    def register_parameter(self, name: str, param: Optional[Parameter]) -> None:   # 比register buffer更常用
        r"""Adds a parameter to the module.

        The parameter can be accessed as an attribute using given name.

        Args:
            name (string): name of the parameter. The parameter can be accessed
                from this module using the given name
            param (Parameter or None): parameter to be added to the module. If      # parameter类是tensor类的子类
                ``None``, then operations that run on parameters, such as :attr:`cuda`,
                are ignored. If ``None``, the parameter is **not** included in the
                module's :attr:`state_dict`.
                
        Example:: #实现示例
        
            class GaussianModel(nn.Module):
                def __init__(self):
                    super(GaussianModel, self).__init__()
                    self.register_parameter('mean', nn.Parameter(torch.zeros(1),requires_grad=True)) #输入名称和parameter变量
                    ...
                def forward(self, x):
                    ...               
        """
        # parameter类路径：CLASS torch.nn.parameter.Parameter(data=None, requires_grad=True)
        # 模型内部写参数时要用parameter类型，其可以自动加入module的parameters列表中，而不要用tensor类型
        
        ...... # 后面内容省略 
        
        
    def add_module(self, name: str, module: Optional['Module']) -> None:  
        # 向当前module增加子module，最后会储存在self._modules的字典中（ self._modules[name] = module ）
        """ Adds a child module to the current module."""       
        
        ...... # 后面内容省略 
        
        
    def register_module(self, name: str, module: Optional['Module']) -> None:   # 同add_module
        r"""Alias for :func:`add_module`.""" 
        self.add_module(name, module)
        
        ...... # 后面内容省略 
        
        
    def get_submodule(self, target: str) -> "Module":   # 找到当前module的子module
        """
        Returns the submodule given by ``target`` if it exists,
        otherwise throws an error.

        For example, let's say you have an ``nn.Module`` ``A`` that
        looks like this:

        .. code-block::text

            A(
                (net_b): Module(
                    (net_c): Module(
                        (conv): Conv2d(16, 33, kernel_size=(3, 3), stride=(2, 2))
                    )
                    (linear): Linear(in_features=100, out_features=200, bias=True)
                )
            )

        (The diagram shows an ``nn.Module`` ``A``. ``A`` has a nested
        submodule ``net_b``, which itself has two submodules ``net_c``
        and ``linear``. ``net_c`` then has a submodule ``conv``.)

        To check whether or not we have the ``linear`` submodule, we
        would call ``get_submodule("net_b.linear")``. To check whether
        we have the ``conv`` submodule, we would call
        ``get_submodule("net_b.net_c.conv")``.     # 用法示例（用.连接）
        """
        ...... # 后面内容省略 
        
        

#### get_parameter()函数

In [None]:
    def get_parameter(self, target: str) -> "Parameter":   # 得到parameter，target要将module path写完整
        """
        Returns the parameter given by ``target`` if it exists,
        otherwise throws an error.

        See the docstring for ``get_submodule`` for a more detailed
        explanation of this method's functionality as well as how to
        correctly specify ``target``.

        Args:
            target: The fully-qualified string name of the Parameter
                to look for. (See ``get_submodule`` for how to specify a
                fully-qualified string.)

        Returns:
            torch.nn.Parameter: The Parameter referenced by ``target``

        Raises:
            AttributeError: If the target string references an invalid
                path or resolves to something that is not an
                ``nn.Parameter``
        """
        module_path, _, param_name = target.rpartition(".")  # 语句返回最右边的“.”左侧的字符串，“.”本身，和“.”右侧的字符串
        
        mod: torch.nn.Module = self.get_submodule(module_path) # 最右边的“.”左侧的字符串即为模块路径，此语句获取模块路径下的子模块赋值给mod

        if not hasattr(mod, param_name):  # hasattr判断模块中是否有相应属性，语句判断mod模块中是否有param_name的键名
            raise AttributeError(mod._get_name() + " has no attribute `"
                                 + param_name + "`")

        param: torch.nn.Parameter = getattr(mod, param_name) #module中有这个参数，将参数赋值给param

        if not isinstance(param, torch.nn.Parameter): # 判断param是否是torch.nn.Parameter类的实例，若不是则报错
            raise AttributeError("`" + param_name + "` is not an "
                                 "nn.Parameter")

        return param

#### get_buffer()函数

In [None]:
    def get_buffer(self, target: str) -> "Tensor": # 得到 buffer，target要将module path写完整
        """
        Returns the buffer given by ``target`` if it exists,
        otherwise throws an error.

        See the docstring for ``get_submodule`` for a more detailed
        explanation of this method's functionality as well as how to
        correctly specify ``target``.

        Args:
            target: The fully-qualified string name of the buffer
                to look for. (See ``get_submodule`` for how to specify a
                fully-qualified string.)

        Returns:
            torch.Tensor: The buffer referenced by ``target``

        Raises:
            AttributeError: If the target string references an invalid
                path or resolves to something that is not a
                buffer
        """
        module_path, _, buffer_name = target.rpartition(".")

        mod: torch.nn.Module = self.get_submodule(module_path)

        if not hasattr(mod, buffer_name):
            raise AttributeError(mod._get_name() + " has no attribute `"
                                 + buffer_name + "`")

        buffer: torch.Tensor = getattr(mod, buffer_name)

        if buffer_name not in mod._buffers:  
            raise AttributeError("`" + buffer_name + "` is not a buffer")
        # buffer类型是tensor无法用类型审核，但又要看其是否是一个buffer还是中间计算产生的tensor，就可用mod._buffers的字典去判断
            
        return buffer

#### _apply()函数

In [None]:
    def _apply(self, fn):  # 操作的对象有三，所有module，parameter和buffer
        for module in self.children():
            module._apply(fn)    # 对当前模块的所有子模块调用相应fn

        def compute_should_use_set_data(tensor, tensor_applied):
            if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
                # If the new tensor has compatible tensor type as the existing tensor,
                # the current behavior is to change the tensor in-place using `.data =`,
                # and the future behavior is to overwrite the existing tensor. However,
                # changing the current behavior is a BC-breaking change, and we want it
                # to happen in future releases. So for now we introduce the
                # `torch.__future__.get_overwrite_module_params_on_conversion()`
                # global flag to let the user control whether they want the future
                # behavior of overwriting the existing tensor or not.
                return not torch.__future__.get_overwrite_module_params_on_conversion()
            else:
                return False

        for key, param in self._parameters.items():
            if param is None:
                continue
            # Tensors stored in modules are graph leaves, and we don't want to
            # track autograd history of `param_applied`, so we have to use
            # `with torch.no_grad():`
            with torch.no_grad():
                param_applied = fn(param)    # 对所有参数施加fn
            should_use_set_data = compute_should_use_set_data(param, param_applied)
            if should_use_set_data:
                param.data = param_applied
                out_param = param
            else:
                assert isinstance(param, Parameter)
                assert param.is_leaf
                out_param = Parameter(param_applied, param.requires_grad)
                self._parameters[key] = out_param

            if param.grad is not None:
                with torch.no_grad():
                    grad_applied = fn(param.grad)
                should_use_set_data = compute_should_use_set_data(param.grad, grad_applied)
                if should_use_set_data:
                    out_param.grad.data = grad_applied
                else:
                    assert param.grad.is_leaf
                    out_param.grad = grad_applied.requires_grad_(param.grad.requires_grad)

        for key, buf in self._buffers.items():
            if buf is not None:
                self._buffers[key] = fn(buf)    # 对所有buffer也施加fn
   
        return self

#### apply()函数

In [None]:
    def apply(self: T, fn: Callable[['Module'], None]) -> T:  # 模型初始化参数时有时会用到
        r"""Applies ``fn`` recursively to every submodule (as returned by ``.children()``)
        as well as self. Typical use includes initializing the parameters of a model
        (see also :ref:`nn-init-doc`).

        Args:
            fn (:class:`Module` -> None): function to be applied to each submodule

        Returns:
            Module: self

        Example::

            >>> @torch.no_grad()
            >>> def init_weights(m):
            >>>     print(m)
            >>>     if type(m) == nn.Linear:
            >>>         m.weight.fill_(1.0)
            >>>         print(m.weight)
            >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
            >>> net.apply(init_weights)
            Linear(in_features=2, out_features=2, bias=True)
            Parameter containing:
            tensor([[ 1.,  1.],
                    [ 1.,  1.]])
            Linear(in_features=2, out_features=2, bias=True)
            Parameter containing:
            tensor([[ 1.,  1.],
                    [ 1.,  1.]])
            Sequential(
              (0): Linear(in_features=2, out_features=2, bias=True)
              (1): Linear(in_features=2, out_features=2, bias=True)
            )
            Sequential(
              (0): Linear(in_features=2, out_features=2, bias=True)
              (1): Linear(in_features=2, out_features=2, bias=True)
            )
        """
        for module in self.children(): # 遍历子模块并施加相应fn
            module.apply(fn)
        fn(self)       #注意：对所有子模块作用后再将fn作用在自身模块上
        return self

#### cuda()函数

In [None]:
    def cuda(self: T, device: Optional[Union[int, device]] = None) -> T: # 将所有parameters和buffers移动到gpu上
        r"""Moves all model parameters and buffers to the GPU.

        This also makes associated parameters and buffers different objects. So
        it should be called before constructing optimizer if the module will
        live on GPU while being optimized.

        .. note::
            This method modifies the module in-place.

        Args:
            device (int, optional): if specified, all parameters will be
                copied to that device

        Returns:
            Module: self
        """
        return self._apply(lambda t: t.cuda(device)) #_apply作用于所有子模块，parameters和buffers上

#### type()函数

In [None]:
    def type(self: T, dst_type: Union[dtype, str]) -> T:
        r"""Casts all parameters and buffers to :attr:`dst_type`.

        .. note::
            This method modifies the module in-place.

        Args:
            dst_type (type or string): the desired type

        Returns:
            Module: self
        """
        return self._apply(lambda t: t.type(dst_type))  #对目标转换成相应datatype，实现过程同cuda(),cpu(),xpu()等函数类似

#### to_empty()函数

In [None]:
    # 几乎不会被用到
    def to_empty(self: T, *, device: Union[str, device]) -> T:  # 将模型中所有parameters和buffers移动到同一个device上但不拷贝储存数据
        r"""Moves the parameters and buffers to the specified device without copying storage.

        Args:
            device (:class:`torch.device`): The desired device of the parameters
                and buffers in this module.

        Returns:
            Module: self
        """
        return self._apply(lambda t: torch.empty_like(t, device=device))  # 返回的为空的tensor而抛弃数值