# Reference

*   [YORO v3](https://deep-learning-study.tistory.com/411)
*   [Dataset](https://aihub.or.kr/aidata/27727)
*   [IoU](https://ballentain.tistory.com/12)
*   [Objectness(=Confidence threshold)](https://mickael-k.tistory.com/141)

# Data source

*   [cfg File](https://github.com/pjreddie/darknet)
*   [weights File](https://pjreddie.com/media/files/yolov3.weights)

# Issue

*   [Modify yolo_v3.cfg, width and height: 608 → 416](https://discuss.pytorch.org/t/shape-1-255-3025-is-invalid-for-input-of-size-689520/37603/11)
*   [링크 텍스트](https://)

# Function info

*   [squeeze, unsqueeze](https://sanghyu.tistory.com/86)


# Google Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

%cd /content/gdrive/MyDrive/DeepLearning/Project/HawkEye
!ls -al

# Import Package

In [None]:
from __future__ import division

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import cv2

# Functions

*The neural network configuration brought up by the author of [the paper](https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg)*

In [None]:
# Configuration file read
def parse_cfg(cfgFile) :
    f = open(cfgFile, 'r')
    lines = f.read().split('\n')

    lines = [x for x in lines if len(x) > 0] # empty delete
    lines = [x for x in lines if x[0] != '#'] # comment delete
    lines = [x.rstrip().lstrip() for x in lines] # space delete

    block = {}
    blocks = []

    for line in lines :
        if line[0] == '[' : # start new block
            if len(block) != 0 : # not empty
                blocks.append(block)
                block = {}
            
            block['type'] = line[1 : -1].rstrip()
        else :
            key, value = line.split('=')
            block[key.rstrip()] = value.lstrip()
        
    blocks.append(block)

    return blocks

# Build a Pytorch module for blocks of a configuration file.
def create_modules(blocks) :
    net_info = blocks[0] # save input and pre-processing data
    module_list = nn.ModuleList()

    prev_filters = 3 # RGB(=3) filter
    output_filters = [] # each block's filters count append

    for idx, x in enumerate(blocks[1:]) :
        module = nn.Sequential() # Use Sequential layer

        if x['type'] == 'convolutional' : # block type confirm
            # get layer's infomation
            activation = x["activation"]
            try :
                batch_normalize = int(x['batch_normalize'])
                bias = False
            except :
                batch_normalize = 0
                bias = True

            filters = int(x['filters'])
            padding = int(x['pad'])
            kernel_size = int(x['size'])
            stride = int(x['stride'])

            if padding : pad = (kernel_size - 1) // 2
            else : pad = 0

            # add layer(2D convolutional layer)
            conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=bias)
            module.add_module('conv_{0}'.format(idx), conv)

            # add layer(Batch norm layer)
            if batch_normalize :
                bn = nn.BatchNorm2d(filters)
                module.add_module('batch_norm_{0}'.format(idx), bn)

            # check activation, Leaky ReLU or Linear
            if activation == 'leaky' :
                activn = nn.LeakyReLU(0.1, inplace=True)
                module.add_module('leaky_{0}'.format(idx), activn)

        elif x['type'] == 'upsample' : # block type == upsampling
            stride = int(x['stride'])
            upsample = nn.Upsample(scale_factor=2, mode='bilinear')
            module.add_module('upsample_{}'.format(idx), upsample)

        elif x['type'] == 'route' : # block type == route
            x['layers'] = x['layers'].split(',') # get layer's parameter and split
            start = int(x['layers'][0])

            try : end = int(x['layers'][1])
            except : end = 0

            # positive
            if start > 0 : start = start - idx
            if end > 0 : end = end - idx
            route = EmptyLayer()
            module.add_module('route_{0}'.format(idx), route)
            
            # negative
            if end < 0 : filters = output_filters[idx + start] + output_filters[idx + end]
            else : filters = output_filters[idx + start]

        elif x['type'] == 'shortcut' : # block type == skip connection(shortcut)
            shortcut = EmptyLayer()
            module.add_module('shortcut_{}'.format(idx), shortcut)

        elif x['type'] == 'yolo' : # block type == YOLO
            mask = x['mask'].split(',')
            mask = [int(x) for x in mask]

            anchors = x['anchors'].split(',')
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in mask]
            detection = DetectionLayer(anchors)
            module.add_module('Detection_{}'.format(idx), detection)

        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)

    return (net_info, module_list)

***Util***

In [None]:
# Detection feature map convert 2-D tensor
def prediction_transform(prediction, inp_dim, anchors, num_classes, CUDA=True) :
    batch_size = prediction.size(0)
    stride = inp_dim // prediction.size(2)
    grid_size = inp_dim // stride
    bbox_attrs = 5 + num_classes
    num_anchors = len(anchors)

    prediction = prediction.view(batch_size, bbox_attrs * num_anchors, grid_size * grid_size)
    prediction = prediction.transpose(1, 2).contiguous()
    prediction = prediction.view(batch_size, grid_size * grid_size * num_anchors, bbox_attrs)

    # anchors dimension > detection map dimension so, divide anchors by detection feature map's stride
    anchors = [(a[0] / stride, a[1] / stride) for a in anchors]

    # sigmoid the center x,y coordinates and object confidence.
    prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
    prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
    prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])

    # add center offset
    grid = np.arange(grid_size)
    a, b = np.meshgrid(grid, grid)

    x_offset = torch.FloatTensor(a).view(-1, 1)
    y_offset = torch.FloatTensor(b).view(-1, 1)

    if CUDA :
        x_offset = torch.FloatTensor(a).view(-1, 1).cuda()
        y_offset = torch.FloatTensor(b).view(-1, 1).cuda()

    x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0)
    prediction[:,:,:2] += x_y_offset

    # apply anchors to the dimension of the bounding box.
    anchors = torch.FloatTensor(anchors) # convert width, height by log space
    if CUDA : anchors = torch.FloatTensor(anchors).cuda()

    anchors = anchors.repeat(grid_size * grid_size, 1).unsqueeze(0)
    prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4]) * anchors

    # apply class score to sigmoid activation
    prediction[:, :, 5: 5 + num_classes] = torch.sigmoid((prediction[:, :, 5: 5 + num_classes]))

    # resize detection map to input img size
    prediction[:, :, :4] *= stride

    return prediction

def write_results(prediction, confidence, num_classes, mns_conf=0.4) :
    """
    < Explanation of terms >

    bounding box = A box of the minimum size that can contain all the images.
    threshold = It is similar processing of the image.
    true detections = The scope that needs to be recognized.
    confidence thresholding(=Objectness) = It determines the possibility and accuracy of whether the Box has an object.
    """
    # set the property of the bounding box with an objectness score lower than the threshold to zero.
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask

    # change the center point to the upper left and lower right corner coordinates as follows. It is easy to calculate the IoU(Reference) of two boxes using the coordinates of the diagonal edge pair of boxes.
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - (prediction[:, :, 2] / 2)
    box_corner[:, :, 1] = prediction[:, :, 1] - (prediction[:, :, 3] / 2)
    box_corner[:, :, 2] = prediction[:, :, 0] + (prediction[:, :, 2] / 2)
    box_corner[:, :, 3] = prediction[:, :, 1] + (prediction[:, :, 3] / 2)
    prediction[:, :, 4] = box_corner[:, :, 4]

    for ind in range(batch_size) :
        image_pred = prediction[ind]

        # delete everything except for the class score with the highest value.
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        # remove setting bounding box rows to 0 with object confidence lower than threshold.
        non_zero_ind = torch.nonzero(image_pred[:, 4])
        try : image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
        except : continue # no detection

        # For PyTorch 0.4 compatibility
        # Since the above code with not raise exception for no detection as scalars are supported in PyTorch 0.4
        if image_pred_.shaep[0] == 0 : continue

        # get various classes
        img_classes = unique(image_pred_[:, -1])

        for cls in img_classes :
            # get each class's detections
            cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
            image_pred_[class_mask_ind].view(-1, 7)

            # sort in the order of detections with the highest objectness. confidence is at the top.
            conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0) # detections count

            # excute NMS
            for i in range(idx) :
                # get IoU about all of each box
                try : ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1, :])
                except ValueError : break
                except IndexError : break

                # if IoU > threshhold, detections = 0
                iou_mask = (ious < nms_conf).float().unsqueeze(1)
                image_pred_class[i + 1:] *= iou_mask

                # delete non-zero
                non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze()
                image_pred_class = image_pred_class[non_zero_ind].view(-1, 7)

                # IoU of box with i index and Bounding boxes with index greater than i.
                ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1, :])

                # if IoU > threshhold, detections = 0
                iou_mask = (ious < nms_conf).float().unsqueeze(1)
                image_pred_class[i + 1:] *= iou_mask

                # delete non-zero, The number of bounding boxes is removed by image_pred_class. This means that if any value has been removed by image_pred_class, it cannot have identifications.
                non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze()
                image_pred_class = image_pred_class[non_zero_ind].view(-1, 7)

                # repeat batch_id as much as the detections of the class in the image.
                batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
                seq = batch, image_pred_class

                if not write :
                    output = torch.cat(seq, 1)
                    write = True
                else :
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))

        # check if output is initialized. Returns 0 if any detection was not detected in any image in the batch.
        try : return output # Each prediction has a predicted value in the form of a sensor grouped together.
        except : return 0

# Get image class
def unique(tensor) :
    tensor_np = tensor.cpu().numpy()
    unique_np = np.unique(tensor_np)
    unique_tensor = torch.from_numpy(unique_np)

    tensor_res = tensor.new(unique_tensor.shape)
    tensor_res.copy_(unique_tensor)

    return tensor_res

# Compute IoU
def bbox_iou(box1, box2) :
    # get bounding boxes's coordinates
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box[:, 1], box1[: ,2], box1[:, 3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    # get intersection intercept coordinates
    inter_rect_x1 = torch.max(b1_x1, b2_x1)
    inter_rect_y1 = torch.max(b1_y1, b2_y1)
    inter_rect_x2 = torch.max(b1_x2, b2_x2)
    inter_rect_y2 = torch.max(b1_y2, b2_y2)

    # area intersection
    inter_area = torch.clamp(inter_rect_x2 - inter.rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)

    # area union
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

    # compute IoU score
    iou = inter_area / (b1_area + b2_area - inter_area)

    return iou

***Test***

In [None]:
def get_test_input() :
    img = cv2.imread('./Dataset/dog-cycle-car.png')
    img = cv2.resize(img, (416, 416)) # resize to input size
    img_ = img[:, :, ::-1].transpose((2, 0, 1)) # BGR - > RGB, HxWxC -> CxHxW
    img_ = img_[np.newaxis, :, :, :] / 255.0 # add 0 chanel for batch and normalize
    img_ = torch.from_numpy(img_).float()
    img_ = Variable(img_)

    return img_

# Class

In [None]:
# Dummy layer for object's (of nn.Module) direct connection on forward function for simple processing
class EmptyLayer(nn.Module) :
    def __init__(self) : 
        super(EmptyLayer, self).__init__()

# Layer save anchors for detection about bounding box
class DetectionLayer(nn.Module) :
    def __init__(self, anchors) :
        super(DetectionLayer, self).__init__()

        self.anchors = anchors

class Darknet(nn.Module) :
    def __init__(self, cfgFile) :
        super(Darknet, self).__init__()

        self.blocks = parse_cfg(cfgFile)
        self.net_info, self.module_list = create_modules(self.blocks)

    """
    Purpose: The first is to calculate the output, and the second is to convert the output detection feature map into an easy-to-process method.
    Converting to detection maps across various scales leads to connection. Otherwise, it is impossible because it is at different dimensions.
    """
    def forward(self, x, CUDA) :
        modules = self.blocks[1:]
        outputs = {} # all layer's output

        write = 0 # 0 == outputs is not initate
        for i, module in enumerate(modules) :
            module_type = (module['type'])

            if module_type == 'convolutional' or module_type == 'upsample' : 
                x = self.module_list[i](x) # convolutional or upsample layer = simple forward
                
            elif module_type == 'route' :
                layers = module['layers']
                layers = [int(a) for a in layers]

                if layers[0] > 0 : layers[0] = layers[0] - i

                if len(layers) == 1 : x = outputs[i + layers[0]]
                else :
                    if layers[1] > 0 : layers[1] = layers[1] - i

                    map1 = outputs[i + layers[0]]
                    map2 = outputs[i + layers[1]]

                    x = torch.cat((map1, map2), 1) # two feature map connect

            elif module_type == 'shortcut' :
                from_ = int(module['from'])
                x = outputs[i - 1] + outputs[i + from_]

            elif module_type == 'yolo' :
                anchors = self.module_list[i][0].anchors

                inp_dim = int(self.net_info['height']) # get input dimensions
                num_classes = int(module['classes']) # get class count

                x = x.data
                x = prediction_transform(x, inp_dim, anchors, num_classes, CUDA)
                if not write :
                    detections = x
                    write = 1
                else :
                    detections = torch.cat((detections, x), 1)

            outputs[i] = x

        return detections

    # Load weights
    def load_weights(self, weightFile) :
        fp = open(weightFile, 'rb') # open weights file
        
        """
        1~5 value(first 160 bytes) is header infomation
        - 1. Magor version number
        - 2. Minor version number
        - 3. Subversion number
        - 4, 5. An image learned by neural networks in training
        """
        header = np.fromfile(fp, dtype=np.int32, count=5)
        
        self.header = torch.from_numpy(header)
        self.seen = self.header[3]

        # rest bits represent weights in order. weight is saved by float32 format
        weights = np.fromfile(fp, dtype=np.float32)
        
        ptr = 0 # 
        for i in range(len(self.module_list)) :
            module_type = self.blocks[i + 1]['type']

            if module_type == 'convolutional' :
                model = self.module_list[i]

                # load weight
                try : batch_normalize = int(self.blocks[i + 1]['batch_normalize'])
                except : batch_normalize = 0

                conv = model[0]

                if batch_normalize :
                    bn = model[1]

                    # get batch norm layer's weight count
                    num_bn_biases = bn.bias.numel()

                    # load weights
                    bn_biases = torch.from_numpy(weights[ptr : ptr + num_bn_biases])
                    ptr += num_bn_biases

                    bn_weights = torch.from_numpy(weights[ptr : ptr + num_bn_biases])
                    ptr += num_bn_biases

                    bn_running_mean = torch.from_numpy(weights[ptr : ptr + num_bn_biases])
                    ptr += num_bn_biases

                    bn_running_var = torch.from_numpy(weights[ptr : ptr + num_bn_biases])
                    ptr += num_bn_biases

                    # convert the imported weights to the dimension of the model weights.
                    bn_biases = bn_biases.view_as(bn.bias.data)
                    bn_weights = bn_weights.view_as(bn_weights.data)
                    bn_running_mean = bn_running_mean.view_as(bn.running_mean)
                    bn_running_var = bn_running_var.view_as(bn.running_var)

                    # copy data to model
                    bn.bias.data.copy_(bn_biases)
                    bn.weight.data.copy_(bn_weights)
                    bn.running_mean.copy_(bn_running_mean)
                    bn.running_var.copy_(bn_running_var)

                else :
                    # bring up the biases of the convolutional layer.
                    num_biases = conv.bias.numel()

                    conv_biases = torch.from_numpy(weights[ptr : ptr + num_biases])
                    ptr += num_biases

                    # reshape loaded weights
                    conv_biases = conv_biases.view_as(conv.bias.data)

                    # copy data
                    conv.bias.data.copy_(conv_biases)

                # load weights about Convolutional layer
                num_weights = conv.weight.numel()

                # weights same to up that
                conv_weights = torch.from_numpy(weights[ptr : ptr + num_weights])
                ptr += num_weights

                conv_weights = conv_weights.view_as(conv.weight.data)
                conv.weight.data.copy_(conv_weights)

# Main

In [None]:
model = Darknet("./Dataset/yolo_v3.cfg")
model.load_weights('./Dataset/yolo_v3.weights')
inp = get_test_input()

print("CUDA is available? ",torch.cuda.is_available())
pred = model(inp, torch.cuda.is_available())

print(pred)