План семинара **"Реализация собственных операторов на языке Python в фреймворке Pytorch"**
1. [Squeeze-and-Excitation (SE) Block](https://arxiv.org/abs/1709.01507)
2. [Selective Kernel (SK) Convolution](https://arxiv.org/abs/1903.06586)

# Squeeze-and-Excitation (SE) Block

“Squeeze-and-Excitation” (SE) block can adaptively recalibrates
channel-wise feature responses by explicitly modelling interdependencies between channels.

In [None]:
from einops import rearrange, reduce

In [None]:
import torch
from torch import nn

class SEBlock(nn.Module):
    """
    Implementation of the Squeeze-and-Excitation (SE) block proposed in [1].
    Parameters
    ----------
    in_channels : int
        Number of channels in the input tensor.
    reduction : int, optional, default=16
        Reduction ratio to control the intermediate channel dimension.
    References
    ----------
    1. "`Squeeze-and-Excitation Networks. <https://arxiv.org/abs/1709.01507>`_" Jie Hu, et al. CVPR 2018.
    """

    def __init__(
        self,
        in_channels: int,
        reduction: int = 16
    ) -> None:
        super(SEBlock, self).__init__()

        out_channels = in_channels // reduction
        self.squeeze = nn.AdaptiveAvgPool2d((1,1))

        self.excitation = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 1),
            # nn.Linear(in_channels, out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, in_channels, 1),
            # nn.Linear(out_channels, in_channels),
            nn.Sigmoid()
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Parameters
        ----------
        x : torch.Tensor (batch_size, in_channels, height, width)
            Input tensor.
        Returns
        -------
        out : torch.Tensor (batch_size, in_channels, height, width)
            Output of the SK convolution layer.
        """

        """Tasks:
        2. avg pooling by einops
        3. torch layer to use linear layer without reshaping
        """
        # x: [b, c, h, w]

        # z = self.squeeze(x) # eq.2 [b, c, 1, 1]
        # z = torch.reshape(z, (z.shape[0], -1,))
        z = reduce(x, 'b c h w -> b c () ()', 'mean')
        print(z.shape)
        # z = rearrange(z, 'b c h w -> b (c h w)')
        print(z.shape)
        s = self.excitation(z) # eq.3 [b, c, 1, 1]
        # s = torch.reshape(z, (z.shape[0], z.shape[1], 1, 1))
        # s = rearrange(s, 'b (c h w) -> b c h w', h=1, w=1)
        print(s.shape)
        out = x * s # eq. 4 [b, c, h, w]
        return out

In [None]:
features = torch.rand(2, 32, 25, 25)
out = SEBlock(32)
out(features).shape

torch.Size([2, 32, 1, 1])
torch.Size([2, 32, 1, 1])
torch.Size([2, 32, 1, 1])


torch.Size([2, 32, 25, 25])

# Selective Kernel (SK) Convolution

To enable the neurons to adaptively adjust their RF sizes,
we propose an automatic selection operation, “Selective
Kernel” (SK) convolution, among multiple kernels with different kernel sizes

In [None]:
import torch
from torch import nn
from typing import List, Optional

class SKConv(nn.Module):
    """
    Implementation of the Selective Kernel (SK) Convolution proposed in [1].
    Parameters
    ----------
    in_channels : int
        Number of channels in the input tensor.
    out_channels : int
        Number of channels produced by the convolution.
    kernels : List[int], optional, default=[3, 5]
        List of kernel sizes for each branch.
    reduction : int, optional, default=16
        Reduction ratio to control the dimension of "compact feature" ``z`` (see eq.4).
    L : int, optional, default=32
        Minimal value of the dimension of "compact feature" ``z`` (see eq.4).
    groups : int, optional, default=32
        Hyperparameter for ``torch.nn.Conv2d``.
    References
    ----------
    1. "`Selective Kernel Networks. <https://arxiv.org/abs/1903.06586>`_" Xiang Li, et al. CVPR 2019.
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: Optional[int] = None,
        kernels: List[int] = [3, 5],
        reduction: int = 16,
        L: int = 32,
        groups: int = 32
    ) -> None:
        super(SKConv, self).__init__()

        if out_channels is None:
            out_channels = in_channels
        self.out_channels = out_channels

        self.d = max(in_channels // reduction, L) # eq.4

        self.M = len(kernels)

        self.convs = nn.ModuleList([
                nn.Sequential(
                  nn.Conv2d(
                      in_channels,
                      out_channels,
                      3,
                      dilation=k//2,
                      padding=k//2
                      )

            )
            for k in kernels
        ])

        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        self.fc_z = nn.Sequential(
            nn.Linear(out_channels, self.d),
            nn.BatchNorm1d(self.d),
            nn.ReLU()
        )
        self.fc_attn = nn.Linear(self.d, out_channels)
        # Why nn.Softmax(...)???? Cause Softmax does not have any args except dim
        self.softmax = nn.Softmax(dim=2)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Parameters
        ----------
        x : torch.Tensor (batch_size, in_channels, height, width)
            Input tensor.
        Returns
        -------
        out : torch.Tensor (batch_size, out_channels, height, width)
            Output of the SK convolution layer.
        """
        #Conv2d , AvgPoll, softmax, ReLU, BatchNorm, Linear

        # ----- split -----
        # x: [b, c, h, w]
        feats = torch.unsqueeze(x, 1).repeat(1, self.M, 1, 1, 1)  # [b, M, c, h, w]
        print(feats.shape)
        # ----- fuse -----
        # eq.1
        U = torch.zeros_like(x)
        for i in range(self.M):
          U += self.convs[i].forward(feats[:, i, :, :, :])
        print(U.shape)
        # channel-wise statistics, eq.2
        s = self.pool(U) #s: [b, c]
        # compact feature, eq.3
        print(s.shape)
        s = s.reshape(s.shape[0], s.shape[1])
        z = self.fc_z.forward(s) # z [b, d]
        print(z.shape)

        # ----- select -----
        batch_size, out_channels = s.shape

        # attention map, eq.5
        z = self.fc_attn(z)
        print(z.shape)
        score = torch.unsqueeze(z, 1).repeat(1, self.M, 1)  # (batch_size, M * out_channels)
        score = score.reshape(batch_size, self.M, out_channels, 1, 1)  # (batch_size, M, out_channels, 1, 1)
        att = self.softmax(score)
        print(score.shape)
        print(att.shape)

        # fuse multiple branches, eq.6
        out = torch.zeros_like(x)  # (batch_size, out_channels, height, width)
        for i in range(self.M):
          out += feats[:, i, :, :, :] * att[:, i, :, :, :]
        return out

In [None]:
features = torch.rand(1, 34*16, 25, 25)
SKConv(34*16)
out = SKConv(34*16).eval()
out(features).shape

torch.Size([1, 2, 544, 25, 25])
torch.Size([1, 544, 25, 25])
torch.Size([1, 544, 1, 1])
torch.Size([1, 34])
torch.Size([1, 544])
torch.Size([1, 2, 544, 1, 1])
torch.Size([1, 2, 544, 1, 1])


torch.Size([1, 544, 25, 25])

In [None]:
n = nn.Conv2d(3, 3, kernel_size=3)
n.weight.shape

torch.Size([3, 3, 3, 3])

In [None]:
n = nn.Conv2d(3, 3, kernel_size=3, groups=3)
n.weight.shape

torch.Size([3, 1, 3, 3])

In [None]:
features = torch.rand(1, 3, 25, 25)
n(features).shape

torch.Size([1, 3, 23, 23])