# YoloV3 implementation and testing using Pytorch

### step 1: Making the network's layers

In [1]:
from __future__ import division
import torch
import torchvision 
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import variable
import numpy as np
from utils import *


In [2]:
def parse_cfg(file_path):
    #read the layers and store every block as a dictionary
    block = {}
    blocks = []
    with open(file=file_path, mode='r') as file:
        lines = file.read().split('\n')
        lines = [x for x in lines if (len(x)>0)]
        lines = (x for x in lines if x[0] != '#')
        lines = [x.rstrip().lstrip() for x in lines]
    
    for line in lines:
        if line[0] == "[":
            if len(block) != 0:
                blocks.append(block)
                block = {}
            block["type"] = line[1:-1].rstrip()
        else:
            key, value = line.split('=')
            block[key.rstrip()] = value.lstrip()
    blocks.append(block)
        
    return blocks


In [3]:
yolo_blocks = parse_cfg('yolov3.cfg')
print(yolo_blocks[1])

{'type': 'convolutional', 'batch_normalize': '1', 'filters': '32', 'size': '3', 'stride': '1', 'pad': '1', 'activation': 'leaky'}


In [4]:
class EmptyLayer(nn.Module):
    def __init__(self):
        super(EmptyLayer, self).__init__()

class DetectionLayer(nn.Module):
    def __init__(self, anchors):
        super(DetectionLayer, self).__init__()
        self.anchors = anchors


def create_modules(blocks):
    net_info = blocks[0] # get the network info as stored in the first block
    module_list = nn.ModuleList()
    prev_filters = 3
    #filters = 0
    output_filters = []
    
    for index, block in enumerate(blocks[1:]): #making a sequential module for each block containing the layers
        module = nn.Sequential()
        if(block['type'] == 'convolutional'):
            try:
                batch_normalize = int(block["batch_normalize"])
                bias = False
            except:
                batch_normalize = 0
                bias = True                

            filters = int(block["filters"])
            kernel_size = int(block["size"])
            kernel_stride = int(block["stride"])
            kernel_padding = int(block["pad"])
            activation = block["activation"]
            
            if kernel_padding:
                pad = (kernel_size - 1) // 2
            else:
                pad = 0
            
            conv = nn.Conv2d(prev_filters, filters, kernel_size, kernel_stride, pad, bias= bias)
            module.add_module("conv{0}".format(index), conv)
            
            if batch_normalize:
                bn = nn.BatchNorm2d(filters)
                module.add_module("batch_norm{0}".format(index), bn)
                
            if activation == "leaky":
                act = nn.LeakyReLU(0.1, inplace=True)
                module.add_module("leaky{0}".format(index), act)
                
            
        elif(block['type'] == 'shortcut'):
            shortcut = EmptyLayer()
            module.add_module("emptylayer{0}".format(index), shortcut)
            
            
        elif(block['type'] == 'route'):
            print(block)
            block['layers'] = block['layers'].split(',')
            start = int(block['layers'][0])
            
            try:
                end = int(block["layers"][1])
            except:
                end = 0

                
            if start > 0:
                start -= index
            if end > 0:
                end -= index
                
            route = EmptyLayer()
            module.add_module("route{0}".format(index), route)
            
            if end < 0:
                filters = output_filters[index + start] + output_filters[index + end]
            else:
                filters = output_filters[index + start]
                
        
        elif(block['type'] == 'upsample'):
            stride = int(block["stride"])
            upsample = nn.Upsample(scale_factor=2, mode="nearest")
            module.add_module("upsample{0}".format(index), upsample)
            
            
        elif(block['type'] == "yolo"):
            mask = block["mask"].split(',')
            mask = (int(m) for m in mask)
            anchors = block["anchors"].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
            anchors = [anchors[i] for i in mask]
            
            detection = DetectionLayer(anchors)
            module.add_module("detectionlayer{0}".format(index), detection)
            
            
        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)
        
    return (net_info, module_list)

In [5]:
info, lis = create_modules(yolo_blocks)

{'type': 'route', 'layers': '-4'}
{'type': 'route', 'layers': '-1, 61'}
{'type': 'route', 'layers': '-4'}
{'type': 'route', 'layers': '-1, 36'}


In [6]:
lis[106]

Sequential(
  (detectionlayer106): DetectionLayer()
)

In [7]:
class Darknet(nn.Module):
    def __init__(self, cfg_file):
        super(Darknet, self).__init__()
        self.blocks = parse_cfg(cfg_file)
        self.info, self.module_list = create_modules(self.blocks)
        
    def forward(self, x, CUDA):
        modules = self.blocks[1:]
        outputs = {}
        write = 0
        for i, module in enumerate(modules):
            if (module["type"] == "convolutional" or module["type"] == "upsample"):
                x = self.module_list[i](x)
                
                
            elif (module["type"] == "route"):
                layers = module["layers"]
                layers = [int(l) for l in layers]
                
                if (layers[0] > 0):
                    layers[0] -= i
                    
                if len(layers) == 1:
                    x = outputs[i + (layers[0])]
                    
                else:
                    if (layers[1]) > 0:
                        layers [1] -= i
                        
                    map1 = outputs[i + layers[0]]
                    map2 = outputs[i + layers[1]]
                    x = torch.cat((map1, map2), 1 )
                
            elif(module["type"] == "shortcut"):
                layer = int(module["from"])
                x = outputs[i-1] + outputs[i + layer]
                
            elif(module["type"] == "yolo"):
                anchors = self.module_list[i][0].anchors
                input_dim = int(self.info["height"])
                num_classes = int(module["classes"])
                
                
                x = x.data
                print(" #in yolo, x dim : {0}".format(x.shape))
                
                x = predict_transform(x, input_dim, anchors, num_classes, CUDA)
                if not write:              #if no collector has been intialised. 
                    detections = x
                    write = 1

                else:       
                    detections = torch.cat((detections, x), 1)

            outputs[i] = x
        
        return detections

In [11]:
height = int(info['height'])
width = int(info['width'])

In [12]:
def get_test_input(height, width):
    img = cv2.imread("dog-cycle-car.png")
    img = cv2.resize(img, (height,width))          #Resize to the input dimension
    img_ =  img[:,:,::-1].transpose((2,0,1))  # BGR -> RGB | H X W C -> C X H X W 
    img_ = img_[np.newaxis,:,:,:]/255.0       #Add a channel at 0 (for batch) | Normalise
    img_ = torch.from_numpy(img_).float()     #Convert to float
    img_ = Variable(img_)                     # Convert to Variable
    return img_

In [13]:
model = Darknet("yolov3.cfg")
inp = get_test_input(height, width)
pred = model(inp, CUDA=False)
print (pred)

{'type': 'route', 'layers': '-4'}
{'type': 'route', 'layers': '-1, 61'}
{'type': 'route', 'layers': '-4'}
{'type': 'route', 'layers': '-1, 36'}
image dimension : torch.Size([1, 3, 608, 608])
 #in yolo, x dim : torch.Size([1, 255, 19, 19])
 #in yolo, x dim : torch.Size([1, 255, 38, 38])
 #in yolo, x dim : torch.Size([1, 255, 76, 76])
tensor([[[1.6454e+01, 1.6313e+01, 1.1573e+02,  ..., 3.9088e-01,
          4.5618e-01, 5.3708e-01],
         [1.3879e+01, 1.5626e+01, 1.5502e+02,  ..., 4.6558e-01,
          4.6177e-01, 4.5955e-01],
         [1.5942e+01, 1.6298e+01, 3.2926e+02,  ..., 5.1199e-01,
          4.9673e-01, 5.3915e-01],
         ...,
         [6.0400e+02, 6.0383e+02, 9.2609e+00,  ..., 5.0169e-01,
          5.4139e-01, 4.9346e-01],
         [6.0410e+02, 6.0449e+02, 1.7437e+01,  ..., 5.1422e-01,
          4.9967e-01, 5.8847e-01],
         [6.0427e+02, 6.0411e+02, 3.5767e+01,  ..., 4.5829e-01,
          5.4291e-01, 4.8189e-01]]])


