# YoloV3 implementation and testing using Pytorch

### step 1: Making the network's layers

In [2]:
from __future__ import division
import torch
import torchvision 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import variable
import numpy as np


In [25]:
def parse_cfg(file_path):
    #read the layers and store every block as a dictionary
    block = {}
    blocks = []
    with open(file=file_path, mode='r') as file:
        lines = file.read().split('\n')
        lines = [x for x in lines if (len(x)>0 and x[0] != '#')]
    
    for line in lines:
        if line[0] == "[":
            if len(block) != 0:
                blocks.append(block)
                block = {}
            block["type"] = line[1:-1]
        else:
            key, value = line.split('=')
            block[key] = value
    blocks.append(block)
        
    return blocks


In [82]:
class EmptyLayer(nn.Module):
    def __init__(self):
        super(EmptyLayer, self).__init__()

class DetectionLayer(nn.Module):
    def __init__(self, anchors):
        super(DetectionLayer, self).__init__()
        self.anchors = anchors


def create_modules(blocks):
    net_info = blocks[0] # get the network info as stored in the first block
    module_list = nn.ModuleList()
    prev_filters = 3
    filters = 0
    output_filters = []
    
    for index, block in enumerate(blocks[1:]): #making a sequential module for each block containing the layers
        module = nn.Sequential()
        if(block['type'] == 'convolutional'):
            if("batch_normalize" in block):
                batch_normalize = int(block["batch_normalize"])
                bias = False
            else:
                batch_normalize = 0
                bias = True
            filters = int(block["filters"])
            kernel_size = int(block["size"])
            kernel_stride = int(block["stride"])
            kernel_padding = int(block["pad"])
            activation = block["activation"]
            
            if kernel_padding:
                pad = (kernel_size - 1) // 2
            else:
                pad = 0
            
            conv = nn.Conv2d(prev_filters, filters, kernel_size, kernel_stride, pad, bias= bias)
            module.add_module("conv{0}".format(index), conv)
            
            if batch_normalize:
                bn = nn.BatchNorm2d(filters)
                module.add_module("batch_norm{0}".format(index), bn)
                
            if activation == "leaky":
                act = nn.LeakyReLU()
                module.add_module("leaky{0}".format(index), act)
                
            
        elif(block['type'] == 'shortcut'):
            shortcut = EmptyLayer()
            module.add_module("emptylayer{0}".format(index), shortcut)
            
            
        elif(block['type'] == 'route'):
            layers = block['layers '].split(',')
            start = int(layers[0])
            if len(layers) == 2:
                end = int(layers[1])
            else:
                end = 0
                
            if start > 0:
                start -= index
            if end > 0:
                end -= index
                
            route = EmptyLayer()
            module.add_module("route{0}".format(index), route)
            
            if end < 0:
                filters = output_filters[index + start] + output_filters[index + end]
            else:
                filters = output_filters[index + start]
                
        
        elif(block['type'] == 'updsample'):
            stride = int(block["stride"])
            upsample = nn.Upsample(scale_factor=stride, mode="bilinear")
            module.add_module("upsample{0}".format(index), upsample)
            
            
        elif(block['type'] == "yolo"):
            mask = block["mask "].split(',')
            mask = (int(m) for m in mask)
            anchors = block["anchors "].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
            anchors = [anchors[i] for i in mask]
            
            detection = DetectionLayer(anchors)
            module.add_module("detectionlayer", detection)
            
            
        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)
        
    return (net_info, module_list)

In [86]:
blocks = parse_cfg("yolov3.cfg")
inf0, lis = create_modules(blocks)

In [87]:
print(lis)

ModuleList(
  (0): Sequential(
    (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (batch_norm0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky0): LeakyReLU(negative_slope=0.01)
  )
  (1): Sequential(
    (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (batch_norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky1): LeakyReLU(negative_slope=0.01)
  )
  (2): Sequential(
    (conv2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (batch_norm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky2): LeakyReLU(negative_slope=0.01)
  )
  (3): Sequential(
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (batch_norm3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (leaky3): LeakyReL