In [1]:
# Codeblock 1
import torch
import torch.nn as nn

S = 7
B = 2
C = 20

In [4]:
# Codeblock 2
class ConvBlock(nn.Module):
    def __init__(self, 
                 in_channels, 
                 out_channels, 
                 kernel_size, 
                 stride, 
                 padding, 
                 maxpool_flag=False):
        super().__init__()
        self.maxpool_flag = maxpool_flag
        
        self.conv = nn.Conv2d(in_channels=in_channels,       #(1)
                              out_channels=out_channels, 
                              kernel_size=kernel_size, 
                              stride=stride, 
                              padding=padding)
        self.leaky_relu = nn.LeakyReLU(negative_slope=0.1)   #(2)
        
        if self.maxpool_flag:
            self.maxpool = nn.MaxPool2d(kernel_size=2,       #(3)
                                        stride=2)
            
    def forward(self, x):
        #print(f'original\t: {x.size()}')

        x = self.conv(x)
        #print(f'after conv\t: {x.size()}')
        
        x = self.leaky_relu(x)
        #print(f'after leaky relu: {x.size()}')
        
        if self.maxpool_flag:
            x = self.maxpool(x)
            #print(f'after maxpool\t: {x.size()}')
        
        return x

In [3]:
# Codeblock 3
convblock = ConvBlock(in_channels=3,       #(1)
                      out_channels=64,     #(2)
                      kernel_size=7,       #(3)
                      stride=2,            #(4)
                      padding=3,           #(5)
                      maxpool_flag=True)   #(6)
x = torch.randn(1, 3, 448, 448)            #(7)
out = convblock(x)

original	: torch.Size([1, 3, 448, 448])
after conv	: torch.Size([1, 64, 224, 224])
after leaky relu: torch.Size([1, 64, 224, 224])
after maxpool	: torch.Size([1, 64, 112, 112])


In [5]:
# Codeblock 4a
class Backbone(nn.Module):
    def __init__(self):
        super().__init__()
        # in_channels, out_channels, kernel_size, stride, padding
        self.stage0 = ConvBlock(3, 64, 7, 2, 3, maxpool_flag=True)      #(1)
        self.stage1 = ConvBlock(64, 192, 3, 1, 1, maxpool_flag=True)    #(2)
        
        self.stage2 = nn.ModuleList([
            ConvBlock(192, 128, 1, 1, 0), 
            ConvBlock(128, 256, 3, 1, 1), 
            ConvBlock(256, 256, 1, 1, 0),
            ConvBlock(256, 512, 3, 1, 1, maxpool_flag=True)      #(3)
        ])
        
        
        self.stage3 = nn.ModuleList([])
        for _ in range(4):
            self.stage3.append(ConvBlock(512, 256, 1, 1, 0))
            self.stage3.append(ConvBlock(256, 512, 3, 1, 1))
            
        self.stage3.append(ConvBlock(512, 512, 1, 1, 0))
        self.stage3.append(ConvBlock(512, 1024, 3, 1, 1, maxpool_flag=True))  #(4)
        
        
        self.stage4 = nn.ModuleList([])
        for _ in range(2):
            self.stage4.append(ConvBlock(1024, 512, 1, 1, 0))
            self.stage4.append(ConvBlock(512, 1024, 3, 1, 1))
        
        self.stage4.append(ConvBlock(1024, 1024, 3, 1, 1))
        self.stage4.append(ConvBlock(1024, 1024, 3, 2, 1))    #(5)
        
        
        self.stage5 = nn.ModuleList([])
        self.stage5.append(ConvBlock(1024, 1024, 3, 1, 1))
        self.stage5.append(ConvBlock(1024, 1024, 3, 1, 1))
        
# Codeblock 4b
    def forward(self, x):
        print(f'original\t: {x.size()}\n')
        
        x = self.stage0(x)
        print(f'after stage0\t: {x.size()}\n')
        
        x = self.stage1(x)
        print(f'after stage1\t: {x.size()}\n')
        
        for i in range(len(self.stage2)):
            x = self.stage2[i](x)
            print(f'after stage2 #{i}\t: {x.size()}')
        
        print()
        for i in range(len(self.stage3)):
            x = self.stage3[i](x)
            print(f'after stage3 #{i}\t: {x.size()}')
        
        print()
        for i in range(len(self.stage4)):
            x = self.stage4[i](x)
            print(f'after stage4 #{i}\t: {x.size()}')
        
        print()
        for i in range(len(self.stage5)):
            x = self.stage5[i](x)
            print(f'after stage5 #{i}\t: {x.size()}')
            
        return x

In [6]:
# Codeblock 5
backbone = Backbone()
x = torch.randn(1, 3, 448, 448)
out = backbone(x)

original	: torch.Size([1, 3, 448, 448])

after stage0	: torch.Size([1, 64, 112, 112])

after stage1	: torch.Size([1, 192, 56, 56])

after stage2 #0	: torch.Size([1, 128, 56, 56])
after stage2 #1	: torch.Size([1, 256, 56, 56])
after stage2 #2	: torch.Size([1, 256, 56, 56])
after stage2 #3	: torch.Size([1, 512, 28, 28])

after stage3 #0	: torch.Size([1, 256, 28, 28])
after stage3 #1	: torch.Size([1, 512, 28, 28])
after stage3 #2	: torch.Size([1, 256, 28, 28])
after stage3 #3	: torch.Size([1, 512, 28, 28])
after stage3 #4	: torch.Size([1, 256, 28, 28])
after stage3 #5	: torch.Size([1, 512, 28, 28])
after stage3 #6	: torch.Size([1, 256, 28, 28])
after stage3 #7	: torch.Size([1, 512, 28, 28])
after stage3 #8	: torch.Size([1, 512, 28, 28])
after stage3 #9	: torch.Size([1, 1024, 14, 14])

after stage4 #0	: torch.Size([1, 512, 14, 14])
after stage4 #1	: torch.Size([1, 1024, 14, 14])
after stage4 #2	: torch.Size([1, 512, 14, 14])
after stage4 #3	: torch.Size([1, 1024, 14, 14])
after stage4 #4	:

In [7]:
# Codeblock 6
class FullyConnected(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.linear0 = nn.Linear(in_features=1024*7*7, out_features=4096)   #(1)
        self.leaky_relu = nn.LeakyReLU(negative_slope=0.1)                  #(2)
        self.dropout = nn.Dropout(p=0.5)                                    #(3)
        self.linear1 = nn.Linear(in_features=4096, out_features=(C+B*5)*S*S)#(4)
    
    def forward(self, x):
        print(f'original\t: {x.size()}')
        
        x = self.linear0(x)
        print(f'after linear0\t: {x.size()}')
        
        x = self.leaky_relu(x)
        x = self.dropout(x)
        
        x = self.linear1(x)
        print(f'after linear1\t: {x.size()}')
        
        return x

In [8]:
# Codeblock 7
fc = FullyConnected()
x = torch.randn(1, 1024*7*7)
out = fc(x)

original	: torch.Size([1, 50176])
after linear0	: torch.Size([1, 4096])
after linear1	: torch.Size([1, 1470])


In [9]:
# Codeblock 8
class YOLOv1(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.backbone = Backbone()
        self.fc = FullyConnected()
        
    def forward(self, x):
        x = self.backbone(x)
        x = x.flatten()
        x = self.fc(x)
        
        return x

In [10]:
# Codeblock 9
yolov1 = YOLOv1()
x = torch.randn(1, 3, 448, 448)      #(1)

out = yolov1(x)                      #(2)
out = out.reshape(-1, C+B*5, S, S)   #(3)
out.shape

original	: torch.Size([1, 3, 448, 448])

after stage0	: torch.Size([1, 64, 112, 112])

after stage1	: torch.Size([1, 192, 56, 56])

after stage2 #0	: torch.Size([1, 128, 56, 56])
after stage2 #1	: torch.Size([1, 256, 56, 56])
after stage2 #2	: torch.Size([1, 256, 56, 56])
after stage2 #3	: torch.Size([1, 512, 28, 28])

after stage3 #0	: torch.Size([1, 256, 28, 28])
after stage3 #1	: torch.Size([1, 512, 28, 28])
after stage3 #2	: torch.Size([1, 256, 28, 28])
after stage3 #3	: torch.Size([1, 512, 28, 28])
after stage3 #4	: torch.Size([1, 256, 28, 28])
after stage3 #5	: torch.Size([1, 512, 28, 28])
after stage3 #6	: torch.Size([1, 256, 28, 28])
after stage3 #7	: torch.Size([1, 512, 28, 28])
after stage3 #8	: torch.Size([1, 512, 28, 28])
after stage3 #9	: torch.Size([1, 1024, 14, 14])

after stage4 #0	: torch.Size([1, 512, 14, 14])
after stage4 #1	: torch.Size([1, 1024, 14, 14])
after stage4 #2	: torch.Size([1, 512, 14, 14])
after stage4 #3	: torch.Size([1, 1024, 14, 14])
after stage4 #4	:

torch.Size([1, 30, 7, 7])