In [2]:
import torch
import torch.nn as nn

In [48]:
import torch 
import torch.nn as nn

def get_activation(name: str ="silu", inplace: bool =True):
    '''
    Get an activation function given the name
    Args:
        name (str): name of desired activation function
        inplace (bool): specify whether to the operation inplace or not
    Returns
        module (nn.Module): activation function requested
    '''
    if name == "silu":
        module = nn.SiLU(inplace=inplace)
    elif name == "relu":
        module = nn.ReLU(inplace=inplace)
    elif name == "lrelu":
        module = nn.LeakyReLU(0.1, inplace=inplace)
    else:
        raise AttributeError("Unsupported activation function: {}".format(name))
    return module

class BaseConv(nn.Module):
    '''
    Basic Convloutional Block (Conv -> BatchNorm -> ReLU/SiLU)
    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        kernel_size (int): size of kernel
        stride (int): stride length
        groups (int): convolution group
        bias (bool): add bias or not
        activation (str): name of activation function
    '''
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int, groups:int =1, bias: bool =False, activation: str ='silu'):
        super().__init__()
        padding = (kernel_size - 1) // 2 #same padding
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.03)
        self.act = get_activation(activation, inplace=True)

    def forward(self, x: torch.tensor) -> torch.tensor:
        return self.act(self.batchnorm(self.conv(x)))

class Residual(nn.Module):
    '''
    Basic Residual Block as defined in the ResNet paper with two convolutional layers and a skip connection
    Args:
        in_channels (int): number of input channels
    '''
    def __init__(self, in_channels: int):
        super().__init__()
        reduced_channels = in_channels // 2
        self.conv1 = BaseConv(in_channels, reduced_channels, kernel_size=1, stride=1, activation='lrelu')
        self.conv2 = BaseConv(reduced_channels, in_channels, kernel_size=3, stride=1, activation='lrelu')

    def forward(self, x: torch.tensor) -> torch.tensor:
        out = self.conv2(self.conv1(x))
        return x + out

In [51]:
import torch
import torch.nn as nn

class Darknet53(nn.Module):
    '''
    Base Darknet53 based on the YOLOv3 paper
    Args:
        in_channels (int): number of input channels
        stem_out_channels (int): number of output channels for first convolutional layer
        output (tuple): output layers to return
    '''
    def __init__(self, in_channels: int, stem_out_channels: int = 32, output: tuple = ('c3', 'c4', 'c5')):
        super().__init__()
        self.output = output
        num_blocks = [2, 8, 8, 4]
        self.c1 = nn.Sequential(
            BaseConv(in_channels, stem_out_channels, kernel_size=3, stride=1, activation='lrelu'),
            *self._build_group_block(in_channels=stem_out_channels, num_blocks=1, stride=2))
        in_channels = stem_out_channels * 2
        self.c2 = nn.Sequential(
            *self._build_group_block(in_channels=in_channels, num_blocks=num_blocks[0], stride=2))
        in_channels = in_channels * 2
        self.c3 = nn.Sequential(
            *self._build_group_block(in_channels=in_channels, num_blocks=num_blocks[1], stride=2))
        in_channels = in_channels * 2
        self.c4 = nn.Sequential(
            *self._build_group_block(in_channels=in_channels, num_blocks=num_blocks[2], stride=2))
        in_channels = in_channels * 2
        self.c5 = nn.Sequential(
            *self._build_group_block(in_channels=in_channels, num_blocks=num_blocks[3], stride=2))

    def _build_group_block(self, in_channels: int, num_blocks: int, stride: int):
        '''
        Build convolutional layer -> Residual Block (repeated num_blocks times)
        '''
        return [
            BaseConv(in_channels, in_channels*2, kernel_size=3, stride=stride),
            *[(Residual(in_channels*2)) for _ in range(num_blocks)]
        ]

    def forward(self, x):
        outputs = {}
        x = self.c1(x)
        outputs['c1'] = x
        x = self.c2(x)
        outputs['c2'] = x
        x = self.c3(x)
        outputs['c3'] = x
        x = self.c4(x)
        outputs['c4'] = x
        x = self.c5(x)
        outputs['c5'] = x
        return {k:v for k, v in outputs.items() if k in self.output}

class Perception(nn.Module):
    '''
    
    '''
    def __init__(self):
        pass

    def forward(self):
        pass

In [56]:
model = Darknet53(in_channels=3, stem_out_channels=32)
x = torch.randn(2, 3, 416, 416)
out = model(x)
print(f"c3: {out['c3'].shape}")
print(f"c4 :{out['c4'].shape}")
print(f"c5: {out['c5'].shape}")

c3: torch.Size([2, 256, 52, 52])
c4 :torch.Size([2, 512, 26, 26])
c5: torch.Size([2, 1024, 13, 13])
