# 1. Blocks

## 1.1. Conv1dNormActivation

In [16]:
import torch.nn as nn

class Conv1dNormActivation(nn.Module):
    def __init__(
        self,
        in_channels, out_channels, kernel_size,
        norm_layer=nn.BatchNorm1d,
        activation_layer=nn.ReLU,
        **kwargs
    ):
        super().__init__()

        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size=kernel_size, **kwargs)
        self.norm = norm_layer(out_channels) if norm_layer else None
        self.activation = activation_layer() if activation_layer else None
    
    def forward(self, x):
        x = self.conv(x)
        x = self.norm(x) if self.norm else x
        x = self.activation(x) if self.activation else x
        return x

# in_channels, out_channels = 144, 64
# x = torch.randn(1, in_channels, 10)
# l = Conv1dNormActivation(
#     in_channels, out_channels,
#     kernel_size=3, padding='same',
# )

# print(summary(
#     model=l, 
#     input_data=x,
#     col_names=["input_size", "output_size", "num_params", "trainable"],
#     col_width=20,
#     row_settings=["var_names"]
# ))

## 1.7. SEBlock

In [17]:
import torch
import torch.nn as nn

from torchinfo import summary

class SEBlock(nn.Module):
    def __init__(
        self,
        in_channels, squeeze_channels,
        activation_layer=nn.ReLU,
        conv_block=ops.Conv2dNormActivation,
        pool_block=nn.AdaptiveAvgPool2d,
    ):
        """SEBlock is a FusedMBConv block with Squeeze-And-Excitation.

        Args:
            in_channels (int): The number of input channels.
            squeeze_channels (int): The number of channels to squeeze to.
        """
        super().__init__()

        self.pool = pool_block(1)
        self.conv1 = conv_block(
            in_channels, squeeze_channels, 1,
            norm_layer=None, activation_layer=activation_layer,
        )
        self.conv2 = conv_block(
            squeeze_channels, in_channels, 1,
            norm_layer=None, activation_layer=None,
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        inp = x

        # 1. (...) -> (..., in_channels, 1, 1)
        x = self.pool(x)

        # 2. -> (..., squeeze_channels, 1, 1)
        x = self.conv1(x)

        # 3. -> (..., in_channels, 1, 1)
        x = self.conv2(x)

        # 4. Scale
        x = self.sigmoid(x)
        x = inp * x

        return x

# in_channels = 144
# x = torch.randn(1, in_channels, 10)
# l = SEBlock(in_channels, squeeze_channels=6)

# print(summary(
#     model=l, 
#     input_data=x,
#     col_names=["input_size", "output_size", "num_params", "trainable"],
#     col_width=20,
#     row_settings=["var_names"]
# ))

## 1.7. EfficientNetBlock

EfficientNetV2

In [18]:
import torch
import torch.nn as nn
import torchvision.ops as ops

from torchinfo import summary

class MBConv(nn.Module):
    def __init__(
        self,
        in_channels, out_channels,
        bottleneck=4, kernel_size=3, stride=1, padding='same', squeeze_ratio=4,
        activation_layer=nn.SiLU,
        se_block=SEBlock,
        conv_block=ops.Conv2dNormActivation,
        pool_block=nn.AdaptiveAvgPool2d,
        **kwargs
    ):
        """EfficientNetBlock is a MBConv block with SEBlock.

        Args:
            in_channels (int): The number of input channels.
            out_channels (int): The number of output channels.
            bottleneck (int, optional): The size of bottle neck. Defaults to 4.
            kernel_size (int, optional): The kernel for the middle convolution. Defaults to 3.
            stride (int, optional): The stride for the middle convolution and the shortcut. Defaults to 1.
            padding (str, optional): The padding for the middle convolution. Defaults to 'same'.
        """
        super().__init__()
        self.residual = (in_channels == out_channels and stride == 1)

        modules = nn.ModuleList()
        bottleneck_size = int(in_channels*bottleneck)

        # 1. (..., in_channels, ...) -> (..., bottleneck_size, ...)
        if in_channels != bottleneck_size:
            modules.append(
                conv_block(
                    in_channels, bottleneck_size,
                    kernel_size=1, stride=1, padding='same',
                    activation_layer=activation_layer,
                    **kwargs
                )
            )
        
        # 2. (..., bottleneck_size, ...) -> (..., bottleneck_size, ...)
        modules.append(
            conv_block(
                bottleneck_size, bottleneck_size,
                kernel_size=kernel_size, stride=stride, padding=padding,
                groups=bottleneck_size, # Depthwise Convolution
                activation_layer=activation_layer,
                **kwargs
            )
        )

        # 3. Squeeze and excitation block
        squeeze_channels = max(1, in_channels // squeeze_ratio)
        modules.append(
            se_block(
                bottleneck_size, squeeze_channels,
                activation_layer=activation_layer,
                conv_block=conv_block,
                pool_block=pool_block,
            )
        )

        # 4. (..., bottleneck_size, ...) -> (..., out_channels, ...)
        modules.append(
            conv_block(
                bottleneck_size, out_channels,
                kernel_size=1, stride=1, padding='same',
                activation_layer=nn.Identity,
                **kwargs
            )
        )
        
        self.block = nn.Sequential(*modules)
    
    def forward(self, x):
        inp = x
        x = self.block(x)

        if self.residual:
            x = x + inp

        return x

in_channels, out_channels = 32, 4
kernel_size = 3
batch_size = 1
l = MBConv(
    in_channels, out_channels,
    kernel_size=kernel_size, bottleneck=1,
)
x = torch.randn(batch_size, in_channels, 20, 20)
out = l(x)
x.shape, out.shape

print(summary(
    model=l, 
    input_data=x,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
))

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
MBConv (MBConv)                               [1, 32, 20, 20]      [1, 4, 20, 20]       --                   True
├─Sequential (block)                          [1, 32, 20, 20]      [1, 4, 20, 20]       --                   True
│    └─Conv2dNormActivation (0)               [1, 32, 20, 20]      [1, 32, 20, 20]      --                   True
│    │    └─Conv2d (0)                        [1, 32, 20, 20]      [1, 32, 20, 20]      288                  True
│    │    └─BatchNorm2d (1)                   [1, 32, 20, 20]      [1, 32, 20, 20]      64                   True
│    │    └─SiLU (2)                          [1, 32, 20, 20]      [1, 32, 20, 20]      --                   --
│    └─SEBlock (1)                            [1, 32, 20, 20]      [1, 32, 20, 20]      --                   True
│    │    └─AdaptiveAvgPool2d (pool)          [1, 32, 20, 20]      [1, 32, 1, 1]     

## 1.8. FusedMBConv

In [28]:
import torch
import torch.nn as nn
import torchvision.ops as ops

from torchinfo import summary

class FusedMBConv(nn.Module):
    def __init__(
        self,
        in_channels, out_channels,
        bottleneck=4, kernel_size=3, stride=1, padding='same',
        activation_layer=nn.SiLU,
        se_block=SEBlock,
        conv_block=ops.Conv2dNormActivation,
        pool_block=nn.AdaptiveAvgPool2d,
        **kwargs
    ):
        """FusedMBConv is a MBConv block with fused 1st and 2nd convs.

        Args:
            in_channels (int): The number of input channels.
            out_channels (int): The number of output channels.
            bottleneck (int, optional): The size of bottle neck. Defaults to 4.
            kernel_size (int, optional): The kernel for the middle convolution. Defaults to 3.
            stride (int, optional): The stride for the middle convolution and the shortcut. Defaults to 1.
            padding (str, optional): The padding for the middle convolution. Defaults to 'same'.
        """
        super().__init__()
        self.residual = (in_channels == out_channels and stride == 1)

        modules = nn.ModuleList()
        bottleneck_size = int(in_channels*bottleneck)

        # 1. (..., in_channels, ...) -> (..., bottleneck_size, ...)
        modules.append(
            conv_block(
                in_channels, bottleneck_size,
                kernel_size=kernel_size, stride=stride, padding=padding,
                activation_layer=activation_layer,
                **kwargs
            ),
        )

        # 2. (..., bottleneck_size, ...) -> (..., out_channels, ...)
        if in_channels != bottleneck_size:
            modules.append(
                conv_block(
                    bottleneck_size, out_channels,
                    kernel_size=1, stride=1, padding='same',
                    activation_layer=nn.Identity,
                    **kwargs
                ),
            )
        
        self.block = nn.Sequential(*modules)
    
    def forward(self, x):
        return self.block(x)

# in_channels, out_channels = 24, 48
# kernel_size = 3
# H, W = 256, 256
# batch_size = 1
# l = FusedMBConv(
#     in_channels, out_channels,
#     kernel_size=kernel_size,
#     bottleneck=1,
# )
# x = torch.randn(batch_size, in_channels, H, W)
# out = l(x)
# x.shape, out.shape

# print(summary(
#     model=l, 
#     input_data=x,
#     col_names=["input_size", "output_size", "num_params", "trainable"],
#     col_width=20,
#     row_settings=["var_names"],
#     depth=2
# ))

# 2. Model

* ReLU6 and SiLU usage

## 2.1. EfficientNetConfig

#### TODO
- [x] Pipe activation_layer all the way through.
- [x] Residual connection in SEBlock is broken for stride != 1.
- [x] Round channels to multiple of 8.
- [-] Pipe dropout.
- [-] Configure the minimum number of channels in EfficientNetConfig.
- [x] Adjust variable blocks to have same number of input and output channels.

In [54]:
import math
import torch
import torch.nn as nn
import torchvision.ops as ops

from torchinfo import summary

class EfficientNetV2Config(object):
    def __init__(
        self,
        # B4 configuration
        width_mult=1.4, depth_mult=1.8,
        dropout=0.4, last_channels=1280,

        # Block configuration
        kernel=3,

        # Blocks
        se_block=SEBlock,
        conv_block=ops.Conv2dNormActivation,
        pool_block=nn.AdaptiveAvgPool2d,
    ):
        self.width_mult = width_mult
        self.depth_mult =depth_mult

        self.dropout = dropout
        self.last_channels = last_channels

        # Conv type 1
        self.kernel = kernel

        # Blocks
        self.se_block = se_block
        self.conv_block = conv_block
        self.pool_block = pool_block

        # Block configs
        self.block_configs = [
            # (type, in_channels, out_channels, bottleneck, kernel, padding, stride, layers)
            (FusedMBConv, 24, 24, 1, kernel, 'same', 1, 2),
            (FusedMBConv, 24, 48, 4, 3, 1, 2, 4),
            (FusedMBConv, 48, 64, 4, 3, 1, 2, 4),

            (MBConv, 64, 128, 4, 3, 1, 2, 6),
            (MBConv, 128, 160, 6, kernel, 'same', 1, 9),
            (MBConv, 160, 256, 6, 3, 1, 2, 15),
        ]

    def adjust_channels(self, channels):
        return self.round_to(channels*self.width_mult)
    
    def adjust_depth(self, num_layers):
        return int(math.ceil(num_layers*self.depth_mult))
    
    @staticmethod
    def round_to(v, multiple=8):
        return int(multiple * round(v / multiple))
    
    def _block(self, args, **kwargs):
        b_type, in_channels, out_channels, bottleneck, kernel, padding, stride, layers = args

        # # 1. Update in_channels and out_channels based on the width_mult
        # in_channels = self.adjust_channels(in_channels)
        # out_channels = self.adjust_channels(out_channels)

        # # 2. Update layers based on depth_mult
        # layers = self.adjust_depth(layers)
        block = nn.Sequential(
            b_type(
                in_channels, out_channels,
                bottleneck=bottleneck,
                kernel_size=kernel, stride=stride, padding=padding,
                se_block=self.se_block,
                conv_block=self.conv_block,
                pool_block=self.pool_block,
                **kwargs
            ),
            *map(
                lambda _: b_type(
                    out_channels, out_channels,
                    bottleneck=bottleneck,
                    kernel_size=kernel, stride=1, padding='same',
                    se_block=self.se_block,
                    conv_block=self.conv_block,
                    pool_block=self.pool_block,
                    **kwargs
                ),
                range(layers - 1)
            )
        )
        return block
    
    def make_blocks(self, **kwargs):
        modules = nn.Sequential(
            *map(
                lambda b_config: self._block(b_config,  **kwargs),
                self.block_configs
            )
        )
        return modules

eff_config = EfficientNetV2Config(
    # se_block=SEBlock,
    # conv_block=Conv1dNormActivation,
    # pool_block=nn.AdaptiveAvgPool1d
    
)
block_config = eff_config.block_configs[0]
block = eff_config.make_blocks()

x = torch.randn(1, 24, 256, 256)

print(summary(
    model=block, 
    input_data=x,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"],
    depth=4,
))

Layer (type (var_name))                                 Input Shape          Output Shape         Param #              Trainable
Sequential (Sequential)                                 [1, 24, 256, 256]    [1, 256, 16, 16]     --                   True
├─Sequential (0)                                        [1, 24, 256, 256]    [1, 24, 256, 256]    --                   True
│    └─FusedMBConv (0)                                  [1, 24, 256, 256]    [1, 24, 256, 256]    --                   True
│    │    └─Sequential (block)                          [1, 24, 256, 256]    [1, 24, 256, 256]    --                   True
│    │    │    └─Conv2dNormActivation (0)               [1, 24, 256, 256]    [1, 24, 256, 256]    5,232                True
│    └─FusedMBConv (1)                                  [1, 24, 256, 256]    [1, 24, 256, 256]    --                   True
│    │    └─Sequential (block)                          [1, 24, 256, 256]    [1, 24, 256, 256]    --                   True
│  

In [55]:
import torch
import torch.nn as nn
import torchvision.ops as ops

from torchinfo import summary

class EfficientNetV2(nn.Module):
    def __init__(
        self,
        in_channels, out_channels, config,
        activation_layer=nn.SiLU,
        **kwargs
    ):
        """EfficientNet

        Args:
            in_channels (int): The number of input channels.
            out_channels (int): The number of output channels.
            bottle_factor (int, optional): The size of bottle neck. Defaults to 4.
            kernel_size (int, optional): The kernel for the middle convolution. Defaults to 3.
            stride (int, optional): The stride for the middle convolution and the shortcut. Defaults to 1.
            padding (str, optional): The padding for the middle convolution. Defaults to 'same'.
            phi (float, optional): The compound scaling coefficient. Defaults to 1.
        """
        super().__init__()
        self.config = config
        # in_mb_channels = config.adjust_channels(32)
        # out_mb_channels = config.adjust_channels(320)
        in_mb_channels = config.block_configs[0][1]
        out_mb_channels = config.block_configs[-1][2]

        self.model = nn.Sequential(
            config.conv_block(
                in_channels, in_mb_channels,
                kernel_size=3, padding=1, stride=2,
                activation_layer=activation_layer,
                **kwargs,
            ),
            self.config.make_blocks(activation_layer=activation_layer, **kwargs),
            config.conv_block(
                out_mb_channels, config.last_channels,
                kernel_size=1, activation_layer=activation_layer,
                **kwargs,
            ),
            config.pool_block(1),
        )
        self.classifier = nn.Sequential(
            # nn.Dropout(p=config.dropout, inplace=True),
            nn.Linear(config.last_channels, out_channels),
        )
    
    def forward(self, x):
        x = self.model(x)
        x = x.flatten(1)
        x = self.classifier(x)
        return x

in_channels, out_channels = 4, 6
eff_config = EfficientNetV2Config()
model = EfficientNetV2(
    in_channels,
    out_channels,
    eff_config,
)

x = torch.randn(1, in_channels, 256, 256)
print(summary(
    model=model, 
    input_data=x,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"],
    # depth=6
))

Layer (type (var_name))                                           Input Shape          Output Shape         Param #              Trainable
EfficientNetV2 (EfficientNetV2)                                   [1, 4, 256, 256]     [1, 6]               --                   True
├─Sequential (model)                                              [1, 4, 256, 256]     [1, 1280, 1, 1]      --                   True
│    └─Conv2dNormActivation (0)                                   [1, 4, 256, 256]     [1, 24, 128, 128]    --                   True
│    │    └─Conv2d (0)                                            [1, 4, 256, 256]     [1, 24, 128, 128]    864                  True
│    │    └─BatchNorm2d (1)                                       [1, 24, 128, 128]    [1, 24, 128, 128]    48                   True
│    │    └─SiLU (2)                                              [1, 24, 128, 128]    [1, 24, 128, 128]    --                   --
│    └─Sequential (1)                                      