In [1]:
# Codeblock 1
import torch
import torch.nn as nn

NUM_PRIORS = 3
NUM_CLASS  = 80

In [5]:
# Codeblock 2
class Convolutional(nn.Module):
    def __init__(self, 
                 in_channels, 
                 out_channels, 
                 kernel_size, 
                 stride=1):
        super().__init__()
        
        self.conv = nn.Conv2d(in_channels=in_channels,
                              out_channels=out_channels, 
                              kernel_size=kernel_size, 
                              stride=stride,
                              bias=False,                            #(1)
                              padding=1 if kernel_size==3 else 0)    #(2)
        
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        
        self.leaky_relu = nn.LeakyReLU(negative_slope=0.1)
        
    def forward(self, x):        #(3)
        #print(f'original\t: {x.size()}')

        x = self.conv(x)
        #print(f'after conv\t: {x.size()}')
        
        x = self.bn(x)
        #print(f'after bn\t: {x.size()}')
        
        x = self.leaky_relu(x)
        #print(f'after leaky relu: {x.size()}')
        
        return x

In [3]:
# Codeblock 3
convolutional = Convolutional(in_channels=3,
                              out_channels=32,
                              kernel_size=3)

x = torch.randn(1, 3, 416, 416)
out = convolutional(x)

original	: torch.Size([1, 3, 416, 416])
after conv	: torch.Size([1, 32, 416, 416])
after bn	: torch.Size([1, 32, 416, 416])
after leaky relu: torch.Size([1, 32, 416, 416])


In [4]:
# Codeblock 4
convolutional = Convolutional(in_channels=32,
                              out_channels=64,
                              kernel_size=3, 
                              stride=2)

x = torch.randn(1, 32, 416, 416)
out = convolutional(x)

original	: torch.Size([1, 32, 416, 416])
after conv	: torch.Size([1, 64, 208, 208])
after bn	: torch.Size([1, 64, 208, 208])
after leaky relu: torch.Size([1, 64, 208, 208])


In [8]:
# Codeblock 5
class Residual(nn.Module):
    def __init__(self, num_channels):
        super().__init__()
        self.conv0 = Convolutional(in_channels=num_channels, 
                                   out_channels=num_channels//2,   #(1)
                                   kernel_size=1,       #(2)
                                   stride=1)
        
        self.conv1 = Convolutional(in_channels=num_channels//2,
                                   out_channels=num_channels,      #(3)
                                   kernel_size=3,       #(4)
                                   stride=1)
        
    def forward(self, x):
        original = x.clone()
        #print(f'original\t: {x.size()}')
        
        x = self.conv0(x)
        #print(f'after conv0\t: {x.size()}')
        
        x = self.conv1(x)
        #print(f'after conv1\t: {x.size()}')
        
        x = x + original      #(5)
        #print(f'after summation\t: {x.size()}')
        
        return x

In [7]:
# Codeblock 6
residual = Residual(num_channels=64)

x = torch.randn(1, 64, 208, 208)
out = residual(x)

original	: torch.Size([1, 64, 208, 208])
after conv0	: torch.Size([1, 32, 208, 208])
after conv1	: torch.Size([1, 64, 208, 208])
after summation	: torch.Size([1, 64, 208, 208])


In [9]:
# Codeblock 7
residuals = nn.ModuleList([])
for _ in range(4):
    residual = Residual(num_channels=1024)
    residuals.append(residual)
    
x = torch.randn(1, 1024, 13, 13)

for i in range(len(residuals)):
    x = residuals[i](x)
    print(f'after residuals #{i}\t: {x.size()}')

after residuals #0	: torch.Size([1, 1024, 13, 13])
after residuals #1	: torch.Size([1, 1024, 13, 13])
after residuals #2	: torch.Size([1, 1024, 13, 13])
after residuals #3	: torch.Size([1, 1024, 13, 13])


In [13]:
# Codeblock 8
class Darknet53(nn.Module):
    def __init__(self):
        super().__init__()

        self.convolutional0 = Convolutional(in_channels=3,
                                            out_channels=32,
                                            kernel_size=3)
        
        self.convolutional1 = Convolutional(in_channels=32,
                                            out_channels=64,
                                            kernel_size=3,
                                            stride=2)
        
        self.residuals0 = nn.ModuleList([Residual(num_channels=64) for _ in range(1)])
        
        self.convolutional2 = Convolutional(in_channels=64,
                                            out_channels=128,
                                            kernel_size=3,
                                            stride=2)
        
        self.residuals1 = nn.ModuleList([Residual(num_channels=128) for _ in range(2)])
        
        self.convolutional3 = Convolutional(in_channels=128,
                                            out_channels=256,
                                            kernel_size=3,
                                            stride=2)
        
        self.residuals2 = nn.ModuleList([Residual(num_channels=256) for _ in range(8)])
        
        self.convolutional4 = Convolutional(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=2)
        
        self.residuals3 = nn.ModuleList([Residual(num_channels=512) for _ in range(8)])
        
        self.convolutional5 = Convolutional(in_channels=512,
                                            out_channels=1024,
                                            kernel_size=3,
                                            stride=2)
        
        self.residuals4 = nn.ModuleList([Residual(num_channels=1024) for _ in range(4)])
        
    def forward(self, x):
        #print(f'original\t\t: {x.size()}\n')
        
        x = self.convolutional0(x)
        #print(f'after convolutional0\t: {x.size()}')
        
        x = self.convolutional1(x)
        #print(f'after convolutional1\t: {x.size()}\n')
        
        for i in range(len(self.residuals0)):
            x = self.residuals0[i](x)
            #print(f'after residuals0 #{i}\t: {x.size()}')
        
        x = self.convolutional2(x)
        #print(f'\nafter convolutional2\t: {x.size()}\n')
        
        for i in range(len(self.residuals1)):
            x = self.residuals1[i](x)
            #print(f'after residuals1 #{i}\t: {x.size()}')
            
        x = self.convolutional3(x)
        #print(f'\nafter convolutional3\t: {x.size()}\n')
        
        for i in range(len(self.residuals2)):
            x = self.residuals2[i](x)
            #print(f'after residuals2 #{i}\t: {x.size()}')
        
        branch0 = x.clone()           #(1)
            
        x = self.convolutional4(x)
        #print(f'\nafter convolutional4\t: {x.size()}\n')
        
        for i in range(len(self.residuals3)):
            x = self.residuals3[i](x)
            #print(f'after residuals3 #{i}\t: {x.size()}')
        
        branch1 = x.clone()           #(2)
            
        x = self.convolutional5(x)
        #print(f'\nafter convolutional5\t: {x.size()}\n')
        
        for i in range(len(self.residuals4)):
            x = self.residuals4[i](x)
            #print(f'after residuals4 #{i}\t: {x.size()}')
            
        return branch0, branch1, x    #(3)

In [11]:
# Codeblock 9
darknet53 = Darknet53()

x = torch.randn(1, 3, 416, 416)
out = darknet53(x)

original		: torch.Size([1, 3, 416, 416])

after convolutional0	: torch.Size([1, 32, 416, 416])
after convolutional1	: torch.Size([1, 64, 208, 208])

after residuals0 #0	: torch.Size([1, 64, 208, 208])

after convolutional2	: torch.Size([1, 128, 104, 104])

after residuals1 #0	: torch.Size([1, 128, 104, 104])
after residuals1 #1	: torch.Size([1, 128, 104, 104])

after convolutional3	: torch.Size([1, 256, 52, 52])

after residuals2 #0	: torch.Size([1, 256, 52, 52])
after residuals2 #1	: torch.Size([1, 256, 52, 52])
after residuals2 #2	: torch.Size([1, 256, 52, 52])
after residuals2 #3	: torch.Size([1, 256, 52, 52])
after residuals2 #4	: torch.Size([1, 256, 52, 52])
after residuals2 #5	: torch.Size([1, 256, 52, 52])
after residuals2 #6	: torch.Size([1, 256, 52, 52])
after residuals2 #7	: torch.Size([1, 256, 52, 52])

after convolutional4	: torch.Size([1, 512, 26, 26])

after residuals3 #0	: torch.Size([1, 512, 26, 26])
after residuals3 #1	: torch.Size([1, 512, 26, 26])
after residuals3 #2

In [12]:
# Codeblock 10
print(out[0].shape)      # branch0
print(out[1].shape)      # branch1
print(out[2].shape)      # x

torch.Size([1, 256, 52, 52])
torch.Size([1, 512, 26, 26])
torch.Size([1, 1024, 13, 13])


In [16]:
# Codeblock 11
class DetectionHead(nn.Module):
    def __init__(self, num_channels):
        super().__init__()
        
        self.convhead0 = Convolutional(in_channels=num_channels,
                                       out_channels=num_channels*2,
                                       kernel_size=3)
        
        self.convhead1 = nn.Conv2d(in_channels=num_channels*2, 
                                   out_channels=NUM_PRIORS*(NUM_CLASS+5), 
                                   kernel_size=1)
        
    def forward(self, x):
        #print(f'original\t: {x.size()}')
        
        x = self.convhead0(x)
        #print(f'after convhead0\t: {x.size()}')
        
        x = self.convhead1(x)
        #print(f'after convhead1\t: {x.size()}')
   
        return x

In [15]:
# Codeblock 12
detectionhead = DetectionHead(num_channels=512)

x = torch.randn(1, 512, 13, 13)    #(1)
out = detectionhead(x)

original	: torch.Size([1, 512, 13, 13])
after convhead0	: torch.Size([1, 1024, 13, 13])
after convhead1	: torch.Size([1, 255, 13, 13])


In [17]:
# Codeblock 13a
class YOLOv3(nn.Module):
    def __init__(self):
        super().__init__()
        
        ###############################################
        # Backbone initialization.
        
        self.darknet53 = Darknet53()    #(1)
        
        
        ###############################################
        # For 13x13 output.
        
        self.conv0  = Convolutional(in_channels=1024, out_channels=512, kernel_size=1)
        self.conv1  = Convolutional(in_channels=512, out_channels=1024, kernel_size=3)
        self.conv2  = Convolutional(in_channels=1024, out_channels=512, kernel_size=1)
        self.conv3  = Convolutional(in_channels=512, out_channels=1024, kernel_size=3)
        self.conv4  = Convolutional(in_channels=1024, out_channels=512, kernel_size=1)
        
        self.detection_head_large_obj = DetectionHead(num_channels=512)
        
        
        ###############################################
        # For 26x26 output.
        
        self.conv5  = Convolutional(in_channels=512, out_channels=256, kernel_size=1)  #(2)
        self.upsample0 = nn.Upsample(scale_factor=2)      #(3)
        
        self.conv6  = Convolutional(in_channels=768, out_channels=256, kernel_size=1)
        self.conv7  = Convolutional(in_channels=256, out_channels=512, kernel_size=3)
        self.conv8  = Convolutional(in_channels=512, out_channels=256, kernel_size=1)
        self.conv9  = Convolutional(in_channels=256, out_channels=512, kernel_size=3)
        self.conv10 = Convolutional(in_channels=512, out_channels=256, kernel_size=1)
        
        self.detection_head_medium_obj = DetectionHead(num_channels=256)
        
        
        ###############################################
        # For 52x52 output.
        
        self.conv11  = Convolutional(in_channels=256, out_channels=128, kernel_size=1)  #(4)
        self.upsample1 = nn.Upsample(scale_factor=2)      #(5)
        
        self.conv12  = Convolutional(in_channels=384, out_channels=128, kernel_size=1)
        self.conv13  = Convolutional(in_channels=128, out_channels=256, kernel_size=3)
        self.conv14  = Convolutional(in_channels=256, out_channels=128, kernel_size=1)
        self.conv15  = Convolutional(in_channels=128, out_channels=256, kernel_size=3)
        self.conv16  = Convolutional(in_channels=256, out_channels=128, kernel_size=1)
        
        self.detection_head_small_obj = DetectionHead(num_channels=128)
        
# Codeblock 13b
    def forward(self, x):
        
        ###############################################
        # Backbone.
        branch0, branch1, x = self.darknet53(x)      #(1)
        print(f'branch0\t\t\t: {branch0.size()}')
        print(f'branch1\t\t\t: {branch1.size()}')
        print(f'x\t\t\t: {x.size()}\n')
        
        
        ###############################################
        # Flow to 13x13 detection head.
        
        x = self.conv0(x)
        print(f'after conv0\t\t: {x.size()}')
        
        x = self.conv1(x)
        print(f'after conv1\t\t: {x.size()}')
        
        x = self.conv2(x)
        print(f'after conv2\t\t: {x.size()}')
        
        x = self.conv3(x)
        print(f'after conv3\t\t: {x.size()}')
        
        x = self.conv4(x)
        print(f'after conv4\t\t: {x.size()}')
        
        large_obj = self.detection_head_large_obj(x)
        print(f'large object detection\t: {large_obj.size()}\n')
        
        
        ###############################################
        # Flow to 26x26 detection head.
        
        x = self.conv5(x)
        print(f'after conv5\t\t: {x.size()}')
        
        x = self.upsample0(x)
        print(f'after upsample0\t\t: {x.size()}')
        
        x = torch.cat([x, branch1], dim=1)
        print(f'after concatenate\t: {x.size()}')
        
        x = self.conv6(x)
        print(f'after conv6\t\t: {x.size()}')
        
        x = self.conv7(x)
        print(f'after conv7\t\t: {x.size()}')
        
        x = self.conv8(x)
        print(f'after conv8\t\t: {x.size()}')
        
        x = self.conv9(x)
        print(f'after conv9\t\t: {x.size()}')
        
        x = self.conv10(x)
        print(f'after conv10\t\t: {x.size()}')
        
        medium_obj = self.detection_head_medium_obj(x)
        print(f'medium object detection\t: {medium_obj.size()}\n')
        
        
        ###############################################
        # Flow to 52x52 detection head.
        
        x = self.conv11(x)
        print(f'after conv11\t\t: {x.size()}')
        
        x = self.upsample1(x)
        print(f'after upsample1\t\t: {x.size()}')
        
        x = torch.cat([x, branch0], dim=1)
        print(f'after concatenate\t: {x.size()}')
        
        x = self.conv12(x)
        print(f'after conv12\t\t: {x.size()}')
        
        x = self.conv13(x)
        print(f'after conv13\t\t: {x.size()}')
        
        x = self.conv14(x)
        print(f'after conv14\t\t: {x.size()}')
        
        x = self.conv15(x)
        print(f'after conv15\t\t: {x.size()}')
        
        x = self.conv16(x)
        print(f'after conv16\t\t: {x.size()}')
        
        small_obj = self.detection_head_small_obj(x)
        print(f'small object detection\t: {small_obj.size()}\n')
        

        ###############################################
        # Return prediction tensors.
        
        return large_obj, medium_obj, small_obj

In [18]:
# Codeblock 14
yolov3 = YOLOv3()

x = torch.randn(1, 3, 416, 416)
out = yolov3(x)

branch0			: torch.Size([1, 256, 52, 52])
branch1			: torch.Size([1, 512, 26, 26])
x			: torch.Size([1, 1024, 13, 13])

after conv0		: torch.Size([1, 512, 13, 13])
after conv1		: torch.Size([1, 1024, 13, 13])
after conv2		: torch.Size([1, 512, 13, 13])
after conv3		: torch.Size([1, 1024, 13, 13])
after conv4		: torch.Size([1, 512, 13, 13])
large object detection	: torch.Size([1, 255, 13, 13])

after conv5		: torch.Size([1, 256, 13, 13])
after upsample0		: torch.Size([1, 256, 26, 26])
after concatenate	: torch.Size([1, 768, 26, 26])
after conv6		: torch.Size([1, 256, 26, 26])
after conv7		: torch.Size([1, 512, 26, 26])
after conv8		: torch.Size([1, 256, 26, 26])
after conv9		: torch.Size([1, 512, 26, 26])
after conv10		: torch.Size([1, 256, 26, 26])
medium object detection	: torch.Size([1, 255, 26, 26])

after conv11		: torch.Size([1, 128, 26, 26])
after upsample1		: torch.Size([1, 128, 52, 52])
after concatenate	: torch.Size([1, 384, 52, 52])
after conv12		: torch.Size([1, 128, 52, 52])

In [19]:
# Codeblock 15
print(out[0].shape)
print(out[1].shape)
print(out[2].shape)

torch.Size([1, 255, 13, 13])
torch.Size([1, 255, 26, 26])
torch.Size([1, 255, 52, 52])
