In [1]:
import torch
from torch import nn
from torchvision import models
from torchvision.ops.misc import MLP
from typing import List, Optional, Callable

In [2]:
x = torch.randn(2, 10)

In [3]:
# 一般norm_layer和dropout不同时使用
mlp = MLP(
    in_channels=10,
    hidden_channels=[20, 30, 40],
    norm_layer=None,
    activation_layer=nn.ReLU,
    inplace=True,
    bias=False,
    dropout=0.5,
)
mlp.eval()

MLP(
  (0): Linear(in_features=10, out_features=20, bias=False)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=True)
  (3): Linear(in_features=20, out_features=30, bias=False)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=True)
  (6): Linear(in_features=30, out_features=40, bias=False)
  (7): Dropout(p=0.5, inplace=True)
)

In [4]:
with torch.inference_mode():
    y = mlp(x)
print(y.size())  # [2, 40]

torch.Size([2, 40])


In [5]:
# torchvision.ops.misc.MLP
class MLP1(torch.nn.Sequential):
    """This block implements the multi-layer perceptron (MLP) module.

    Args:
        in_channels (int): Number of channels of the input
        hidden_channels (List[int]): List of the hidden channel dimensions
        norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``None``
        activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
        bias (bool): Whether to use bias in the linear layer. Default ``True``
        dropout (float): The probability for the dropout layer. Default: 0.0
    """

    def __init__(
        self,
        in_channels: int,
        hidden_channels: List[int],
        norm_layer: Optional[Callable[..., torch.nn.Module]] = None,
        activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
        inplace: Optional[bool] = True,
        bias: bool = True,
        dropout: float = 0.0,
    ):
        # The addition of `norm_layer` is inspired from the implementation of TorchMultimodal:
        # https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py
        params = {} if inplace is None else {"inplace": inplace}

        layers = []
        in_dim = in_channels
        for hidden_dim in hidden_channels[:-1]:  # 不要最后一层
            layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias))
            if norm_layer is not None:
                layers.append(norm_layer(hidden_dim))
            layers.append(activation_layer(**params))
            layers.append(torch.nn.Dropout(dropout, **params))
            in_dim = hidden_dim

        # layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias))
        layers.append(
            torch.nn.Linear(in_dim, hidden_channels[-1])
        )  # 最后一层的bias设置为始终为True
        layers.append(torch.nn.Dropout(dropout, **params))

        super().__init__(*layers)

In [6]:
mlp1 = MLP1(
    in_channels=10,
    hidden_channels=[20, 30, 40],
    norm_layer=None,
    activation_layer=nn.ReLU,
    inplace=True,
    bias=False,
    dropout=0.5,
)
mlp1.eval()

MLP1(
  (0): Linear(in_features=10, out_features=20, bias=False)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=True)
  (3): Linear(in_features=20, out_features=30, bias=False)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=True)
  (6): Linear(in_features=30, out_features=40, bias=True)
  (7): Dropout(p=0.5, inplace=True)
)

In [7]:
with torch.inference_mode():
    y = mlp1(x)
print(y.size())  # [2, 40]

torch.Size([2, 40])
