# picodet from scratch

In [1]:
import sys
from pathlib import Path
from typing import Sequence, Optional, Union
sys.path.insert(0, "..")

import numpy as np
import torch
import torch.nn as nn
from pydantic.dataclasses import dataclass
from pydantic import Field
from mmcv.cnn import ConvModule
from mmdet.models.utils import make_divisible
from mmcv.runner import BaseModule

from src.esnet import ESNet
from src.csppan import ChannelEqualiser, DarknetBottleneck, CSPLayer, CSPPAN
from src.picodet_head import PicoDetHead

## Backbone: ESNet

In [2]:
esnet = ESNet()
print(esnet.out_ixs)

[2, 9, 12]




In [3]:
esnet.stage_out_channels
test_input = torch.from_numpy(np.random.rand(1, 3, 320, 320).astype(np.float32))
test_outputs = esnet(test_input)
print([a.shape for a in test_outputs])

[torch.Size([1, 96, 40, 40]), torch.Size([1, 192, 20, 20]), torch.Size([1, 384, 10, 10])]


TODO: Factor inverted residual blocks into this codebase

## Neck: CSPPAN

In [4]:
c = ChannelEqualiser([96, 192, 384], 128)
channel_eq_outputs = c(test_outputs)
print([c.shape for c in channel_eq_outputs])

[torch.Size([1, 128, 40, 40]), torch.Size([1, 128, 20, 20]), torch.Size([1, 128, 10, 10])]


In [5]:
test_input = torch.from_numpy(np.random.rand(1, 32, 100, 100).astype(np.float32))
dbb = DarknetBottleneck(in_channels=32, out_channels=32)
dbb_output = dbb(test_input)
print(dbb_output.shape)

torch.Size([1, 32, 100, 100])


In [6]:
cspl = CSPLayer(32, 32)
cspl_output = cspl(test_input)
print(cspl_output.shape)

torch.Size([1, 32, 100, 100])


In [7]:
csppan = CSPPAN(
    in_channels=[96, 192, 384],
    act_cfg=dict(type='HSwish'),
    norm_cfg=dict(type='BN', requires_grad=True),
    out_channels=96,
    squeeze_ratio=1,
    num_csp_blocks=1
)
csppan_outputs = csppan(test_outputs)
print([t.shape for t in csppan_outputs])

[torch.Size([1, 96, 40, 40]), torch.Size([1, 96, 20, 20]), torch.Size([1, 96, 10, 10]), torch.Size([1, 96, 5, 5])]


In [8]:
type(csppan_outputs)

tuple

## Head: PicoDetHead

In [14]:
h = PicoDetHead(in_channels=96, num_classes=80)

head_outputs = h.forward(csppan_outputs)

for cls_score, bbox_pred in head_outputs:
    print(cls_score.shape, bbox_pred.shape)

torch.Size([1, 80, 40, 40]) torch.Size([1, 32, 40, 40])
torch.Size([1, 80, 20, 20]) torch.Size([1, 32, 20, 20])
torch.Size([1, 80, 10, 10]) torch.Size([1, 32, 10, 10])
torch.Size([1, 80, 5, 5]) torch.Size([1, 32, 5, 5])
