In [None]:
import torch
import torch.nn as nn

In [None]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.leaky = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.leaky(self.bn(self.conv(x)))

class Yolov1(nn.Module):
    def __init__(self, S = 7, B = 2, C = 10):
        super().__init__()
        self.S = S  # grid size
        self.B = B  # number of bounding boxes per grid cell
        self.C = C  # number of classes

        architecture = [
            (64, 7, 2, 3),
            "M",
            (192, 3, 1, 1),
            "M",
            (128, 1, 1, 0),
            (256, 3, 1, 1),
            (256, 1, 1, 0),
            (512, 3, 1, 1),
            "M",
            (256, 1, 1, 0),
            (512, 3, 1, 1),
            (256, 1, 1, 0),
            (512, 3, 1, 1),
            (256, 1, 1, 0),
            (512, 3, 1, 1),
            (256, 1, 1, 0),
            (512, 3, 1, 1),
            (512, 1, 1, 0),
            (1024, 3, 1, 1),
            "M",
            (512, 1, 1, 0),
            (1024, 3, 1, 1),
            (512, 1, 1, 0),
            (1024, 3, 1, 1),
            (1024, 3, 1, 1),
            (1024, 3, 2, 1),
            (1024, 3, 1, 1),
            (1024, 3, 1, 1),
        ]

        self.backbone = self._create_conv_layers(architecture)
        self.fc = self._create_fc_layers(S, B, C)

    def _create_conv_layers(self, architecture):
        layers = []
        in_channels = 3

        for l in architecture:
            if isinstance(l, tuple):
                out_channels, kernel_size, stride, padding = l
                layers.append(CNNBlock(in_channels, out_channels, kernel_size, stride, padding))
                in_channels = out_channels
            elif l == "M":
                layers.append(nn.MaxPool2d(kernel_size=2, stride=2))

        return nn.Sequential(*layers)

    def _create_fc_layers(self, S, B, C):
        return nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * S * S, 4096),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.Linear(4096, S * S * (B * 5 + C)),
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x.reshape(-1, self.S, self.S, self.B * 5 + self.C)

model = Yolov1()

