# Import Libraries

In [5]:
import torch
import torch.nn as nn
import pandas as pd
import os
import PIL
import skimage
from skimage import io
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from torch.utils.data import DataLoader
import tqdm
seed = 123
import cv2
import xml.etree.ElementTree as ET
torch.manual_seed(seed)
from collections import Counter

In [6]:
architecture_config = [
    #Tuple: (Kernel_size, number of filters, strides, padding)
    (7, 64, 2, 3),
    #"M" = Max Pool layer
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    #List: [(tuple), (tuple), how many time to repeat]
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1),
    #Doesnt include fc layers
]

In [7]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(CNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.leakyrelu = nn.LeakyReLU(0.1)
    
    def forward(self,x):
        return self.leakyrelu(self.batchnorm(self.conv(x)))
   

In [27]:
architecture_config[0][1]

64

In [41]:
type(architecture_config[9])

list

In [34]:
# 1
CNNBlock(3,64, kernel_size=7,stride=2,padding=3)
# 2

CNNBlock(
  (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leakyrelu): LeakyReLU(negative_slope=0.1)
)

In [40]:
layers = []
layers += [
    CNNBlock(3,64, kernel_size=7,stride=2,padding=3),
    CNNBlock(3,64, kernel_size=7,stride=2,padding=3),
]
def fu(*args):
    print(args)

nn.Sequential(*layers)


Sequential(
  (0): CNNBlock(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leakyrelu): LeakyReLU(negative_slope=0.1)
  )
  (1): CNNBlock(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leakyrelu): LeakyReLU(negative_slope=0.1)
  )
)

In [44]:
 
class YoloV1(nn.Module):
    def __init__(self, in_channels=3, **kwargs):
        super(YoloV1, self).__init__()
        self.architecture = architecture_config
        self.in_channels = in_channels
        self.darknet = self._create_conv_layers(self.architecture)
        self.fcs = self._create_fcs(**kwargs)

    def _create_fcs(self, split_size, num_boxes, num_classes):
        S, B, C = split_size, num_boxes, num_classes
        return nn.Sequential(nn.Flatten(),nn.Linear(1024 * S * S,496), nn.Dropout(0.0), nn.LeakyReLU(0.1),nn.Linear(496, S * S * (C + B *5)))

    def forward(self, x):
        x = self.darknet(x)
        return self.fcs(torch.flatten(x,start_dim=1))
    
    def _create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == tuple:
                layers += [CNNBlock(in_channels, x[1], kernel_size=x[0],stride=x[2], padding=x[3])]
                in_channels = x[1]
            elif type(x) == str:
                layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
            elif type(x) == list:
                conv1 = x[0]
                conv2 = x[1]
                repeats = x[2]
                #repeats = 4
                for _ in range(repeats):
                    # in_channels = 64 for the first time
                    # conv1 = [1, 256, 1, 0] for the first time
                    layers += [CNNBlock(in_channels,conv1[1],kernel_size=conv1[0],stride=conv1[2],padding=conv1[3])]
                    # conv1[1] = 256 for the first time
                    # conv2 = [3, 512, 1, 1] for the first time
                    layers += [CNNBlock(conv1[1],conv2[1],kernel_size=conv2[0],stride=conv2[2],padding=conv2[3])]
                    in_channels = conv2[1]

        return nn.Sequential(*layers)

In [53]:
yoloModel = YoloV1(split_size=2,num_boxes=8,num_classes=78).state_dict()
yoloKeys = yoloModel.keys()
for key in yoloKeys:
    print(key)
    print(yoloModel[key].shape)

darknet.0.conv.weight
torch.Size([64, 3, 7, 7])
darknet.0.batchnorm.weight
torch.Size([64])
darknet.0.batchnorm.bias
torch.Size([64])
darknet.0.batchnorm.running_mean
torch.Size([64])
darknet.0.batchnorm.running_var
torch.Size([64])
darknet.0.batchnorm.num_batches_tracked
torch.Size([])
darknet.2.conv.weight
torch.Size([192, 64, 3, 3])
darknet.2.batchnorm.weight
torch.Size([192])
darknet.2.batchnorm.bias
torch.Size([192])
darknet.2.batchnorm.running_mean
torch.Size([192])
darknet.2.batchnorm.running_var
torch.Size([192])
darknet.2.batchnorm.num_batches_tracked
torch.Size([])
darknet.4.conv.weight
torch.Size([128, 192, 1, 1])
darknet.4.batchnorm.weight
torch.Size([128])
darknet.4.batchnorm.bias
torch.Size([128])
darknet.4.batchnorm.running_mean
torch.Size([128])
darknet.4.batchnorm.running_var
torch.Size([128])
darknet.4.batchnorm.num_batches_tracked
torch.Size([])
darknet.5.conv.weight
torch.Size([256, 128, 3, 3])
darknet.5.batchnorm.weight
torch.Size([256])
darknet.5.batchnorm.bias
to

# Utility Functions

## Intersection over Union

In [None]:
def intersection_over_union(boxes_preds, boxes_labels,box_format='midpoint'):
    """
    Calculates intersection over union

    Parameters:
        boxes_preds(tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
        box_format (str): midpoint/corners, if boxes are (x,y,w,h) or (x1,y1,x2,y2)
    """