In [1]:
import torch
from torch import nn
from torch.nn import functional as F

# Conv2d

math:`(N, C_{\text{in}}, H, W)` and output :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})`

can be precisely described as:

$$
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
\sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
$$

Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`
- Output: :math:`(N, C_{out}, H_{out}, W_{out})` or :math:`(C_{out}, H_{out}, W_{out})`, where

$$
H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[0] - \text{dilation}[0]
        \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
$$

$$
W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[1] - \text{dilation}[1]
        \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
$$

# 实例化Conv2d

In [2]:
conv2d = nn.Conv2d(
    in_channels=2,
    out_channels=1,
    kernel_size=3,
    stride=1,
    padding=1,
    dilation=1,
    groups=1,
    bias=True,
)
conv2d.eval()

Conv2d(2, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

## 初始化方式1

In [3]:
def init_weights(m):
    if type(m) == nn.Conv2d:
        nn.init.normal_(m.weight, mean=0, std=0.01)

In [4]:
conv2d.apply(init_weights)

Conv2d(2, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# 查看并设置Conv2d的weight和bias(初始化方式2)

In [3]:
conv2d.weight.data.normal_(0, 0.01)

tensor([[[[-0.0005, -0.0096, -0.0118],
          [ 0.0166, -0.0111, -0.0106],
          [-0.0021,  0.0072, -0.0109]],

         [[-0.0060, -0.0164,  0.0004],
          [-0.0056,  0.0084,  0.0112],
          [-0.0231,  0.0142, -0.0080]]]])

In [4]:
conv2d.weight.data

tensor([[[[-0.0005, -0.0096, -0.0118],
          [ 0.0166, -0.0111, -0.0106],
          [-0.0021,  0.0072, -0.0109]],

         [[-0.0060, -0.0164,  0.0004],
          [-0.0056,  0.0084,  0.0112],
          [-0.0231,  0.0142, -0.0080]]]])

In [5]:
conv2d.bias.data.fill_(0)

tensor([0.])

In [6]:
conv2d.bias.data

tensor([0.])

# 自动推理

In [8]:
x = torch.arange(1, 51.0).reshape(1, 2, 5, 5)
x

tensor([[[[ 1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10.],
          [11., 12., 13., 14., 15.],
          [16., 17., 18., 19., 20.],
          [21., 22., 23., 24., 25.]],

         [[26., 27., 28., 29., 30.],
          [31., 32., 33., 34., 35.],
          [36., 37., 38., 39., 40.],
          [41., 42., 43., 44., 45.],
          [46., 47., 48., 49., 50.]]]])

In [9]:
x.shape
# (B, C, H, W)

torch.Size([1, 2, 5, 5])

In [10]:
with torch.inference_mode():
    y = conv2d(x)
y

tensor([[[[-16.6096, -17.8440, -18.2492, -18.6544,  -7.1371],
          [-19.1482, -23.9443, -24.6553, -25.3664, -10.7965],
          [-23.0593, -27.4994, -28.2105, -28.9215, -12.4913],
          [-26.9704, -31.0546, -31.7656, -32.4766, -14.1861],
          [-19.1191, -17.0690, -17.4659, -17.8627, -11.7871]]]])

# 手动计算

In [11]:
(x[:, :, :3, :3] * conv2d.weight.data + conv2d.bias.data).sum()

tensor(-21.7098)

# ConvNormAct

In [None]:
class ConvNormAct(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: int = 3,
        stride: int = 1,
        padding: int | None = None,
        dilation: int = 1,
        groups: int = 1,
        bias: bool = True,
        norm: nn.Module = nn.BatchNorm2d,
        act: nn.Module = nn.ReLU,
    ) -> None:
        super().__init__()
        assert in_channels % groups == 0
        assert out_channels % groups == 0
        padding = padding or dilation * (kernel_size - 1) // 2
        self.conv = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias,
        )
        self.norm = norm(out_channels)
        self.act = act()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.act(self.norm(self.conv(x)))