<a href="https://colab.research.google.com/github/Mahikarimib/object-detection/blob/main/Object_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Object Detection - YOLO

Tiny YOLO

In [None]:
def non_max_suppress(conf, xy_min, xy_max, threshold=.4):
    _, _, classes = conf.shape
    boxes = [(_conf, _xy_min, _xy_max) for _conf, _xy_min, _xy_max in zip(conf.reshape(-1, classes), xy_min.reshape(-1, 2), xy_max.reshape(-1, 2))]

    # Iterate each class
    for c in range(classes):
        # Sort boxes according to their prob for class c
        boxes.sort(key=lambda box: box[0][c], reverse=True)
        # Iterate each box
        for i in range(len(boxes) - 1):
            box_i = boxes[i]
            if box_i[0][c] == 0:
                continue
            for box_j in boxes[i + 1:]:
                # Take iou threshold into account
                if iou(box_i[1], box_i[2], box_j[1], box_j[2]) >= threshold:
                    box_j[0][c] = 0
    return boxes

Intersection over Union (IoU)

In [None]:
def iou(b1, b2):
    # determine the coordinates of the intersection rectangle
    # each box is a list of four numbers like [x1, y1, x2, y2]
    xA = max(b1[0], b2[0])
    yA = max(b1[1], b2[1])
    xB = min(b1[2], b2[2])
    yB = min(b1[3], b2[3])

    # compute the area of intersection
    area_intersect = (xB - xA + 1) * (yB - yA + 1)

    # Calculate area of the two boxes
    area_b1 = (b1[2] - b1[0] + 1) * (b1[3] - b1[1] + 1)
    area_b2 = (b2[2] - b2[0] + 1) * (b2[3] - b2[1] + 1)

    # compute and return the intersection over union
    return area_intersect / float(area_b1 + area_b2 - area_intersect)

Library

In [None]:
import os
import time
import warnings
from glob import glob
from PIL import Image
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F

from yolo_loss import YoloLoss
from utils import *

Tiny YOLO implementation

In [None]:
x = torch.rand(1, 1, 3, 3)
print(x.size(), x)

In [None]:
x = F.pad(x, (0, 1, 0, 1), mode='replicate')
print(x.size(), x)

In [None]:
x = F.max_pool2d(x, 2, stride=1)
print(x.size(), x)

In [None]:
class MaxPoolStride1(nn.Module):
    def __init__(self):
        super(MaxPoolStride1, self).__init__()

    def forward(self, x):
        x = F.max_pool2d(F.pad(x, (0,1,0,1), mode='replicate'), 2, stride=1)
        return x


class TinyYoloNet(nn.Module):
    def __init__(self):
        super(TinyYoloNet, self).__init__()
        
        self.num_classes = 20  # VOC PASCAL
        self.anchors = [1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52]
        self.num_anchors = len(self.anchors) / 2
        num_output = (5 + self.num_classes) * self.num_anchors
        
        # 7 x 7 feature map
        self.width = 224  
        self.height = 224
        
        # loss function
        self.loss = YoloLoss(self.num_classes, self.anchors, self.num_anchors)
        
        # Convultional Neural Network
        self.cnn = nn.Sequential(OrderedDict([
            # conv1
            ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)),
            ('bn1', nn.BatchNorm2d(16)),
            ('leaky1', nn.LeakyReLU(0.1, inplace=True)),
            ('pool1', nn.MaxPool2d(2, 2)),

            # conv2
            ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)),
            ('bn2', nn.BatchNorm2d(32)),
            ('leaky2', nn.LeakyReLU(0.1, inplace=True)),
            ('pool2', nn.MaxPool2d(2, 2)),

            # conv3
            ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)),
            ('bn3', nn.BatchNorm2d(64)),
            ('leaky3', nn.LeakyReLU(0.1, inplace=True)),
            ('pool3', nn.MaxPool2d(2, 2)),

            # conv4
            ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)),
            ('bn4', nn.BatchNorm2d(128)),
            ('leaky4', nn.LeakyReLU(0.1, inplace=True)),
            ('pool4', nn.MaxPool2d(2, 2)),

            # conv5
            ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)),
            ('bn5', nn.BatchNorm2d(256)),
            ('leaky5', nn.LeakyReLU(0.1, inplace=True)),
            ('pool5', nn.MaxPool2d(2, 2)),

            # conv6
            ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)),
            ('bn6', nn.BatchNorm2d(512)),
            ('leaky6', nn.LeakyReLU(0.1, inplace=True)),
            ('pool6', MaxPoolStride1()),   # does not change tensor height and width

            # conv7
            ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)),
            ('bn7', nn.BatchNorm2d(1024)),
            ('leaky7', nn.LeakyReLU(0.1, inplace=True)),

            # conv8
            ('conv8', nn.Conv2d(1024, 1024, 3, 1, 1, bias=False)),
            ('bn8', nn.BatchNorm2d(1024)),
            ('leaky8', nn.LeakyReLU(0.1, inplace=True)),

            # output: 125 = 5 * (4 + 1 + 20)
            ('output', nn.Conv2d(1024, 125, 1, 1, 0))
        ]))

    def forward(self, x):
        x = self.cnn(x)
        return x

In [None]:
def load_model(weights):
    model = TinyYoloNet()
    model.load_state_dict(torch.load(weights))

In [None]:
model = load_model(weights='weights/tiny-yolo-voc.pth')

In [None]:
class_names = load_class_names('data/voc.names')
print('  '.join(class_names))

In [None]:
def predict(model, img_path, img_size=416, save_to=None, conf_thresh=0.4, nms_thresh=0.5):
    # read image
    img = Image.open(img_path).convert('RGB')
    
    # perform object detection
    tic = time.time()
    boxes = detect(model, img.resize((img_size, img_size)), conf_thresh, nms_thresh)
    toc = time.time()
    
    # show results
    print("Prediction time: [{:.5f} ms.]".format((toc - tic) * 1000))
    pred_img = plot_boxes(img, boxes, save_to, class_names) 
    
    # return the image with predictions
    return pred_img

In [None]:
predict(model, 'data/eagle.jpg')

In [None]:
predict(model, 'data/giraffe.jpg')

YOLO 2

In [None]:
from yolo_v2 import YoloNet

In [None]:
def load_yolo2(num_classes, anchors, weights):
    yolo = YoloNet(num_classes, anchors)
    yolo.load_state_dict(torch.load(weights))

In [None]:
weight_file = 'weights/yolo.pth'

# anchor and classes information
class_names = open('data/coco.names').read().split('\n')
num_classes = len(class_names)

anchors = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]

image_size = 608

In [None]:
model = load_yolo2(num_classes, anchors, weight_file)

In [None]:
predict(model, 'data/dog.jpg', conf_thresh=0.3)

In [None]:
predict(model, 'data/person.jpg')