# Implementation of YOLOv1 Architecture in PyTorch

## Architecture Implementation
We will start by implementing the architecture of the YOLO mode from the [You Only Look Once](https://arxiv.org/abs/1506.02640) paper.

In [17]:
import torch
import torch.nn as nn

architecture_config = [
    # (kernel_size, input_channels, stride, padding)
    # "M" is max pool
    # This is a mix of Conv and MaxPool layers
    (7, 64, 2, 3),
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    # list with two conv_configs and the last digit (4) is the number of repititions of this layer
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 0, 1),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1)
]

In [18]:
class CNNBlock(nn.Module):
    """
    Convolutional Block that will serve as the building block for the YOLO Architecture.
    Inputs:
    in_channels: inputs
    out_channels: outputs

    Outputs:
    Forward propogation for leakyrelu(batchnorm(conv(x))) where x is our input
    """

    def __init__(self, in_chanels, out_chanels, **kwargs):
        super(CNNBlock.self).__init__()
        # Creates 2D Convolutional Layers
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        # Batch Normalization normalizes intermediate outputs; thus, making optimization faster and more stable
        self.batchnorm = nn.BatchNorm2d(out_chanels)
        # As stated in the paper, we sill use Leaky ReLU with 0.1 slope
        self.leakyrelu = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.leakyrelu(self.batchnorm(self.conv(x)))

In [19]:
class YOLOv1(nn.Module):
    """
    Main engine class for YOLOv1. It will have everything needed for building the YOLOv1 model architecture.  

    Inputs:
    in_channels: default is 3 in case we are using RGB images

    Outputs:
    Forward propogation for YOLOv1 architecture
    """

    def __init__(self, in_channels=3, **kwargs):
        super(YOLOv1, self).__init__()
        # We get the architecture configurations from the above
        self.architecture = architecture_config
        # We take the inputs
        self.in_channels = in_channels
        # The paper uses Darknet Framework by J. Redmon as the hidden layer architecture
        self.darknet = self.create_conv_layers(self.architecture)
        # The model should end with a fully-connected layer
        self.fully_connected_layer = self.create_fc_layer(**kwargs)

    def forward(self, x):
        x = self.darknet(x)
        # We flatten the Conv layer by dim=1 to flatten the number of classes
        return self.fully_connected_layer(torch.flatten(x, start_dim=1))

    # Implementation of darknet
    def create_conv_layers(self, architecture):
        # We will put layers into a list
        layers = []
        in_channels = self.in_channels

        # We traverse through the architecture
        for x in architecture:
            if type(x) == tuple:
                layers += [CNNBlock(in_chanels, x[1],
                                    kernel_size=x[0], stride=x[2], padding=x[3])]
            elif type(x) == str:
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            elif type(x) == list:
                conv1 = x[0]
                conv2 = x[1]
                loop_size = x[3]
                
                for _ in range(loop_size):
                    layers += [CNNBlock(in_chanels, conv1[1], kernel_size=conv1[0], stride=conv1[2], padding=conv1[3])]
                    # The input for the following block is the output of the previous one. Therefore in_channels for the below layer is conv1[1]
                    layers += [CNNBlock(conv1[1], conv2[1], kernel_size=conv2[0], stride=conv2[2], padding=conv2[3])]
                    # Output of this conv2 layer is going to be input of the conv1 for the following repition
                    in_channels = conv2[1]
            
            # We unpack the list and convert it to nn.Sequential      
            return nn.Sequential(*layers)
        
    # Implementation of Fully Connected Layer
    def create_fc_layer(self, split_size, num_boxes, num_classes):
        S, B, C = split_size, num_boxes, num_classes
        return nn.Sequential(
            nn.Flatten(),
            nn.Linear()
        )