![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)

# <center> Deep Learning Methods </center>
## <center> Lecture 10 -  Object Detection </center>
### <center> YOLO v3 - Model </center>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/DeepLearningMethods/10_ObjectDetection/MainYOLOv3Model.ipynb)

In [1]:
#-- Wide screen:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [2]:
#-- Auto reload:
%load_ext autoreload
%autoreload 2

In [3]:
if 'google.colab' in str(get_ipython()):
    !npx degit FixelAlgorithmsTeam/FixelCourses/DeepLearningMethods/10_ObjectDetection/DeepLearningFramework ./DeepLearningFramework
    !pip install torchinfo

In [4]:
import torch
import torch.nn as nn
import torchinfo

#### Conv block:
Conv + BN + ReLU

In [5]:
class ConvBlock(nn.Module):
    def __init__(self, cIn, cOut, kernelSize, stride=1, bActivation=True):
        super().__init__()

        padding = kernelSize // 2
        oConv   = nn.Conv2d(cIn, cOut, kernel_size=kernelSize, padding=padding, stride=stride, bias=not bActivation)
        if bActivation == True: self.oBlock = nn.Sequential(oConv, nn.BatchNorm2d(cOut), nn.LeakyReLU(0.1))
        else:                   self.oBlock = nn.Sequential(oConv)

    def forward(self, mX):
        return self.oBlock(mX)

<img src='https://github.com/FixelAlgorithmsTeam/FixelCourses/blob/master/DeepLearningMethods/10_ObjectDetection/ResBlock.png?raw=true' alt="Drawing" style="width: 450px;"/>

In [6]:
class ResBlock(nn.Module):
    def __init__(self, cIn):
        super().__init__()
        
        self.oBlock = nn.Sequential(
            ConvBlock(cIn,    cIn//2, kernelSize=1),
            ConvBlock(cIn//2, cIn,    kernelSize=3)
        )

    def forward(self, mX):
        return mX + self.oBlock(mX)

#### Darknet53:
<img src='https://github.com/FixelAlgorithmsTeam/FixelCourses/blob/master/DeepLearningMethods/10_ObjectDetection/Darknet53.png?raw=true' alt="Drawing" style="width: 850px;"/>

In [7]:
class Darknet53(nn.Module):
    def __init__(self):
        super().__init__()

        self.oBlock1 = nn.Sequential(
            ConvBlock(3,    32,   3, 1),
            ConvBlock(32,   64,   3, 2),
            ResBlock (64),
            ConvBlock(64,   128,  3, 2),
            ResBlock (128),
            ResBlock (128),
            ConvBlock(128,  256,  3, 2),
            ResBlock (256),
            ResBlock (256),
            ResBlock (256),
            ResBlock (256),
            ResBlock (256),
            ResBlock (256),
            ResBlock (256),
            ResBlock (256),
        )

        self.oBlock2 = nn.Sequential(
            ConvBlock(256,  512,  3, 2),
            ResBlock (512),
            ResBlock (512),
            ResBlock (512),
            ResBlock (512),
            ResBlock (512),
            ResBlock (512),
            ResBlock (512),
            ResBlock (512),
        )

        self.oBlock3 = nn.Sequential(
            ConvBlock(512,  1024, 3, 2),
            ResBlock (1024),
            ResBlock (1024),
            ResBlock (1024),
            ResBlock (1024),
        )

    def forward(self, mX):
        mBlock1 = self.oBlock1(mX)
        mBlock2 = self.oBlock2(mBlock1)
        mBlock3 = self.oBlock3(mBlock2)

        return mBlock3, mBlock2, mBlock1

In [8]:
torchinfo.summary(Darknet53(), (16, 3, 416, 416))

Layer (type:depth-idx)                             Output Shape              Param #
Darknet53                                          --                        --
├─Sequential: 1-1                                  [16, 256, 52, 52]         --
│    └─ConvBlock: 2-1                              [16, 32, 416, 416]        --
│    │    └─Sequential: 3-1                        [16, 32, 416, 416]        928
│    └─ConvBlock: 2-2                              [16, 64, 208, 208]        --
│    │    └─Sequential: 3-2                        [16, 64, 208, 208]        18,560
│    └─ResBlock: 2-3                               [16, 64, 208, 208]        --
│    │    └─Sequential: 3-3                        [16, 64, 208, 208]        20,672
│    └─ConvBlock: 2-4                              [16, 128, 104, 104]       --
│    │    └─Sequential: 3-4                        [16, 128, 104, 104]       73,984
│    └─ResBlock: 2-5                               [16, 128, 104, 104]       --
│    │    └─Sequential

#### YOLO v3:
<img src='https://github.com/FixelAlgorithmsTeam/FixelCourses/blob/master/DeepLearningMethods/10_ObjectDetection/YOLOv3.png?raw=true' alt="Drawing" style="width: 850px;"/>

In [9]:
class NotResBlock(nn.Module):
    def __init__(self, cIn, cOut):
        super().__init__()

        self.oBlock = nn.Sequential(
            ConvBlock(cIn,      cOut, kernelSize=1),
            ConvBlock(cOut,   2*cOut, kernelSize=3),
            ConvBlock(2*cOut,   cOut, kernelSize=1),
            ConvBlock(cOut,   2*cOut, kernelSize=3),
            ConvBlock(2*cOut,   cOut, kernelSize=1),
        )

    def forward(self, mX):
        return self.oBlock(mX)

In [10]:
class UpsampleBlock(nn.Module):
    def __init__(self, cIn):
        super().__init__()

        self.oBlock = nn.Sequential(
            ConvBlock  (cIn, cIn // 2, kernelSize=1),
            nn.Upsample(scale_factor=2)
        )
    
    def forward(self, mX):
        return self.oBlock(mX)

In [11]:
class OutBlock(nn.Module):
    def __init__(self, cIn, nLabels):
        super().__init__()

        self.oBlock = nn.Sequential(
            ConvBlock(cIn,     2 * cIn,           kernelSize=3),
            ConvBlock(2 * cIn, 3 * (nLabels + 5), kernelSize=3, bActivation=False)
        )

    def forward(self, mX):
        return self.oBlock(mX)

In [12]:
class YOLOv3(nn.Module):
    def __init__(self, nLabels):
        super().__init__()

        self.nLabels    = nLabels

        self.oDarknet53 = Darknet53    ()
        self.oBlock1    = NotResBlock  (1024, 512)
        self.oBlock2    = NotResBlock  (768, 256)
        self.oBlock3    = NotResBlock  (384, 128)
        self.oUpBlock1  = UpsampleBlock(512)
        self.oUpBlock2  = UpsampleBlock(256)

        self.oOutBlock1 = OutBlock(512, nLabels)
        self.oOutBlock2 = OutBlock(256, nLabels)
        self.oOutBlock3 = OutBlock(128, nLabels)

    def forward(self, mX):
        mBlock3, mBlock2, mBlock1 = self.oDarknet53(mX)
        
        mScale1 = self.oBlock1  (mBlock3)
        mZ      = self.oUpBlock1(mScale1)
        mZ      = torch.cat     ([mZ, mBlock2], dim=1)
        mScale2 = self.oBlock2  (mZ)
        mZ      = self.oUpBlock2(mScale2)
        mZ      = torch.cat     ([mZ, mBlock1], dim=1)
        mScale3 = self.oBlock3  (mZ)

        mOut1   = self.oOutBlock1(mScale1)
        mOut2   = self.oOutBlock2(mScale2)
        mOut3   = self.oOutBlock3(mScale3)

        mOut1   = mOut1.view(-1, 3, 5+self.nLabels, 13, 13)
        mOut2   = mOut2.view(-1, 3, 5+self.nLabels, 26, 26)
        mOut3   = mOut3.view(-1, 3, 5+self.nLabels, 52, 52)
        return mOut1, mOut2, mOut3

In [13]:
torchinfo.summary(YOLOv3(20), (16, 3, 416, 416))

Layer (type:depth-idx)                                  Output Shape              Param #
YOLOv3                                                  --                        --
├─Darknet53: 1-1                                        [16, 1024, 13, 13]        --
│    └─Sequential: 2-1                                  [16, 256, 52, 52]         --
│    │    └─ConvBlock: 3-1                              [16, 32, 416, 416]        928
│    │    └─ConvBlock: 3-2                              [16, 64, 208, 208]        18,560
│    │    └─ResBlock: 3-3                               [16, 64, 208, 208]        20,672
│    │    └─ConvBlock: 3-4                              [16, 128, 104, 104]       73,984
│    │    └─ResBlock: 3-5                               [16, 128, 104, 104]       82,304
│    │    └─ResBlock: 3-6                               [16, 128, 104, 104]       82,304
│    │    └─ConvBlock: 3-7                              [16, 256, 52, 52]         295,424
│    │    └─ResBlock: 3-8         