# picodet from scratch

In [3]:
import sys
from pathlib import Path
from typing import Sequence, Optional, Union
sys.path.insert(0, "..")

import numpy as np
import torch
import torch.nn as nn
from pydantic.dataclasses import dataclass
from pydantic import Field
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from mmdet.models.utils import make_divisible

from src.esnet import ESNet
from src.csppan import ChannelEqualiser

## Backbone: ESNet

In [8]:
esnet = ESNet()



In [9]:
esnet.out_ixs

[2, 9, 12]

In [10]:
esnet.stage_out_channels
test_input = torch.from_numpy(np.random.rand(1, 3, 320, 320).astype(np.float32))
test_outputs = esnet(test_input)
print([a.shape for a in test_outputs])

[torch.Size([1, 96, 40, 40]), torch.Size([1, 192, 20, 20]), torch.Size([1, 384, 10, 10])]


In [12]:
type(test_outputs[0])

torch.Tensor

TODO: Factor inverted residual blocks into this codebase

## Neck: CSPPAN

In [5]:
c = ChannelEqualiser([96, 192, 384], 128)

In [6]:
channel_eq_outputs = c(test_outputs)

NameError: name 'test_outputs' is not defined

In [11]:
[c.shape for c in channel_eq_outputs]

[torch.Size([1, 128, 40, 40]),
 torch.Size([1, 128, 20, 20]),
 torch.Size([1, 128, 10, 10])]

In [15]:
from mmcv.cnn import ConvModule

In [21]:
@dataclass
class DarknetBottleneck(BaseModule):
    in_channels: int = Field(
        description="The input channels of this Module.")
    out_channels: int = Field(
        description="The output channels of this Module.")
    kernel_size: int = Field(
        default=1, description="The kernel size of the convolution.")
    expansion: int = Field(
        default=0.5, description="Hidden conv block channels relative to output")
    add_identity: bool = Field(
        default=True, description="Whether to add identity to the out.")
    use_depthwise: bool = Field(
        default=False, description="Whether to use depthwise separable convolution.")
    conv_cfg: dict = Field(
        default=None, description="Config dict for 2D convolution layer.")
    norm_cfg: dict = Field(
        default_factory=lambda: dict(type='BN'), description="Config dict for 2D convolution layer.") 
    act_cfg: dict = Field(
        default_factory=lambda: dict(type='Swish'), description="Config dict for activation layer.")

    def __post_init__(self) -> None:
        super().__init__()
        
    def __post_init_post_parse__(self) -> None:
        self.hidden_channels = int(self.out_channels * self.expansion)
        self.conv = DepthwiseSeparableConvModule if self.use_depthwise else ConvModule
        self.conv1 = ConvModule(
            self.in_channels,
            self.hidden_channels,
            kernel_size=1,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg
        )
        self.conv2 = self.conv(
            self.hidden_channels,
            self.out_channels,
            kernel_size=self.kernel_size,
            stride=1,
            padding=(self.kernel_size - 1) // 2,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg
        )
    
    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
        input_ = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.add_identity and self.in_channels == self.out_channels:
            return out + identity
        return out

In [22]:
dbb = DarknetBottleneck(in_channels=32, out_channels=32)

In [30]:
dbb(torch.from_numpy(np.random.rand(1, 3, 100, 100)).shape

torch.Size([1, 3, 100, 100])

In [None]:
@dataclass
class CSPLayer(BaseModule):
    in_channels: int
    out_channels: int
    kernel_size: int = 1
    expand_ratio: float = 0.5
    expansion: float = 0.5
    num_blocks: int = 1
    add_identity: bool = True
    use_depthwise: bool = False
    conv_cfg: Optional[dict]
    norm_cfg: Optional[dict] = dict(type='BN', momentum=0.03, eps=0.001)
    act_cfg: Optional[dict] = dict(type='Swish')

In [21]:
@dataclass
class CSPPAN(BaseModule):
    in_channels: list[int]
    out_channels: int
    kernel_size: int = Field(default=5, description="Conv2D kernel size")
    n_features: int = Field(default=3, description="Number of output features of CSPPAN module")
    expansion: float = Field(default=0.5, description="...")

## Head: PicoDetHead