# **Pytorch DarkNet implemention**
- Simple pytorch implemention of DarkNet53 and DarkNet19

In [2]:
import torch
import torch.nn as nn 

---
## Implement the conv layers of DarkNet
- Each conv layer is followed by a bathnorm layer and leakyrelu activation layer

In [4]:
def darkConv(in_dim, out_dim, kernel_size=3, stride=1, padding=1):
    convLayer = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
        nn.BatchNorm2d(out_dim)
        nn.LeakyReLU()
    )
    return convLayer

---

## Implement the residual block
- Different from ResNet, the dimension will not change after a matrix run through the residual block
    - Thus no downsample function is needed here

In [5]:
class ResidualBlock(nn.Module):

    def __init__(self, in_dim):
        super(ResidualBlock, self).__init__()
        # the matrix will be compressed inside the blocks
        self.dim_inside = in_dim // 2
        # there is no need for downsampling: the number of planes will not change 
        self.conv1 = darkConv(in_dim, self.dim_inside, kernel_size=1, padding=0)
        self.conv2 = darkConv(self.dim_inside, in_dim)
    
    def forward(self, x):
        # keep the origin input
        ori_in = x
        # do the forward propagation
        out = self.conv1(x)
        out = self.conv2(out)
        # complete the gradient flow
        out += ori_in
        return out

---

## Construct the DarkNet architecture
- The DarkNet53 uses stride 2 to resize the feature matrix in each step
- The DarkNet19 uses maxpooling to do this, while keeping its stride of conv layers to be 1

In [21]:
class DarkNet(nn.Module):

    def __init__(self, block, out_dim, block_num, max_pool=False):
        super(DarkNet, self).__init__()
        dim_list = [32, 64, 128, 256, 512, 1024]
        # instead of stride 2, darknet19 use maxpool to resize the feature matrix
        self.max_pool = max_pool
        # first conv layer, input is images
        self.conv = darkConv(3, dim_list[0])
        # enter the residual blocks
        self.resblock1 = self._mack_layers(block, block_dim=dim_list[1], block_num=block_num[0])
        self.resblock2 = self._mack_layers(block, block_dim=dim_list[2], block_num=block_num[1])
        self.resblock3 = self._mack_layers(block, block_dim=dim_list[3], block_num=block_num[2])
        self.resblock4 = self._mack_layers(block, block_dim=dim_list[4], block_num=block_num[3])
        self.resblock5 = self._mack_layers(block, block_dim=dim_list[5], block_num=block_num[4])
        # do the avg pooling and fc layer
        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(dim_list[5], out_dim)
        
    def _mack_layers(self, block, block_dim, block_num):
        layers = []
        # darknet19 uses stride 1 and darknet53 uses stride 2
        if self.max_pool:
            # resize the feature matrix by maxpooling
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            layers.append(darkConv(block_dim // 2, block_dim))
        else:
            layers.append(darkConv(block_dim // 2, block_dim, stride=2))
        for _ in range(block_num):
            # the residual blocks do not change the dim
            layers.append(block(block_dim))
        return nn.Sequential(*layers)

    def forward(self, x):
        # forward propagation
        out = self.conv1(x)
        out = self.residual_block1(out)
        out = self.residual_block2(out)
        out = self.residual_block3(out)
        out = self.residual_block4(out)
        out = self.residual_block5(out)
        out = self.global_avg_pool(out)
        # flatten the whole matrix into 1 dim
        out = torch.flatten(out)
        out = self.fc(out)
        return out

---

## Create DarkNet19 and DarkNet53
- DarkNet53: 1 + 5 * 1 + 2 * (1 + 2 + 8 + 8 + 4) + 1 = 53
- DarkNet19: 1 + 5 * 1 + 2 * (0 + 1 + 1 + 2 + 2) + 1 = 19
- Small object detection:
    - Shallow networks are reported to have better performaces
    - Max pooling layers really hurt the features of the small objects
    - Wonder if DarkNet19 without max pooling layers can have a good performace on this task

In [24]:
def darkNet53(out_dim):
    return DarkNet(ResidualBlock, out_dim, block_num=[1, 2, 8, 8, 4])

def darkNet19(out_dim):
    return DarkNet(ResidualBlock, out_dim, block_num=[0, 1, 1, 2, 2], max_pool=True)

# use stride 2 to reduce the dimension instead of maxpooling
def darkNet19NoMaxPool(out_dim):
    return DarkNet(ResidualBlock, out_dim, block_num=[0, 1, 1, 2, 2])

---

## Check the structure of the networks

In [18]:
mynet = darkNet53(1000)
print(mynet)

DarkNet(
  (conv): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (resblock1): Sequential(
    (0): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): ResidualBlock(
      (conv1): Sequential(
        (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
      (conv2): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stat

In [22]:
mynet = darkNet19(800)
print(mynet)

DarkNet(
  (conv): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (resblock1): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
  )
  (resblock2): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (1): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (2): ResidualBlock(


In [25]:
mynet = darkNet19NoMaxPool(800)
print(mynet)

DarkNet(
  (conv): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
  )
  (resblock1): Sequential(
    (0): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
  )
  (resblock2): Sequential(
    (0): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): ResidualBlock(
      (conv1): Sequential(
        (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True