In [1]:
import import_ipynb
import dark_net

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import math
import warnings

importing Jupyter notebook from dark_net.ipynb


In [2]:
def load_classes(namesfile):
    fp = open(namesfile, "r")
    names = fp.read().split("\n")[:-1]
    return names

classes = load_classes(os.path.join(os.getcwd(), 'coco.txt'))

In [3]:
net = dark_net.Darknet(os.path.join(os.getcwd(), 'yolov3.txt')).cuda()
net.load_weights('yolov3.weights')
net.eval()
test_batch = torch.FloatTensor(1, 3, 608, 608).cuda()

In [4]:
def unique(tensor):
    tensor_np = tensor.cpu().numpy()
    unique_np = np.unique(tensor_np)
    unique_tensor = torch.from_numpy(unique_np)
    
    tensor_res = tensor.new(unique_tensor.shape)
    tensor_res.copy_(unique_tensor)
    return tensor_res

In [5]:
def bbox_iou(box1, box2):
    """
    Returns the IoU of two bounding boxes 
    
    
    """
    #Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
    
    #get the corrdinates of the intersection rectangle
    inter_rect_x1 =  torch.max(b1_x1, b2_x1)
    inter_rect_y1 =  torch.max(b1_y1, b2_y1)
    inter_rect_x2 =  torch.min(b1_x2, b2_x2)
    inter_rect_y2 =  torch.min(b1_y2, b2_y2)
    
    #Intersection area
    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)

    #Union Area
    b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
    
    iou = inter_area / (b1_area + b2_area - inter_area)
    
    return iou

In [6]:
def post_process(prediction, confidence=0.5, num_classes=80, nms_conf=0.4):
    
    # [B, num_anchors * grid * grid]
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask
    
    box_corner = prediction.new(prediction.shape)
    box_corner[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_corner[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_corner[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
    box_corner[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_corner[:,:,:4]
    
    batch_size = prediction.size(0)
    write = False
    
    for ind in range(batch_size):
        image_pred = prediction[ind]          #image Tensor
        #confidence threshholding 
        #NMS

        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        non_zero_ind =  (torch.nonzero(image_pred[:,4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
        except:
            continue

        if image_pred_.shape[0] == 0:
            continue       
        
        # [grids, (x, y, h, w, confidence, max_conf_class, max_conf_class_idx)]
        
        #Get the various classes detected in the image
        img_classes = unique(image_pred_[:,-1])  # -1 index holds the class index
        
        for cls in img_classes:
            #perform NMS

        
            #get the detections with one particular class
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            image_pred_class = image_pred_[class_mask_ind].view(-1,7)
            
            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)   #Number of detections
            
            for i in range(idx):
                #Get the IOUs of all boxes that come after the one we are looking at 
                #in the loop
                try:
                    ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                except ValueError:
                    break
            
                except IndexError:
                    break
            
                #Zero out all the detections that have IoU > treshhold
                iou_mask = (ious < nms_conf).float().unsqueeze(1)
                image_pred_class[i+1:] *= iou_mask       
            
                #Remove the non-zero entries
                non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
                
            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)      #Repeat the batch_id for as many detections of the class cls in the image
            seq = batch_ind, image_pred_class
            
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq,1)
                output = torch.cat((output,out))

    try:
        return output
    except:
        return 0


In [43]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def preprocess_img(img, inp_dim):
    '''resize image with unchanged aspect ratio using padding'''
    img_w, img_h = img.shape[1], img.shape[0]
    w, h = inp_dim
    new_w = int(img_w * min(w/img_w, h/img_h))
    new_h = int(img_h * min(w/img_w, h/img_h))
    resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
    
    # put a canvas of the NN input dimension
    canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
    # Add the resized image to the canvas
    canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
    
    img = canvas[:,:,::-1].transpose((2,0,1)).copy()
    img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
    
    return img, canvas

In [36]:
def draw_grid(image, prediction):
    
    for obj in prediction:
        
        x_min = int(obj[1].item())
        y_min = int(obj[2].item())
        
        x_max = int(obj[3].item())
        y_max = int(obj[4].item())
        
        cls = int(obj[7].item())
        pos = (x_min - 15, y_min - 15) if x_min - 15 > 0 and y_min > 0 else (x_min, y_min)
        try:
            image = cv2.putText(image, classes[cls], pos, cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0),2)
            image = cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1)
        except:
            print(cls)
    
    return image
            

In [37]:
def predict_image(img): 
    h, w, _ = img.shape
    scaling_factor = min(608 / w,  608 / h)

    copy_img = img.copy()
    img, canvas = preprocess_img(img, (608, 608))

    img = img.cuda()

    with torch.no_grad():
        predictions = net(img, True)

    results = post_process(predictions)
    
    if torch.is_tensor(results):
        results[:, [1,3]] -= (608 - scaling_factor*w)/2
        results[:, [2,4]] -= (608 - scaling_factor*h)/2
        results[:, 1:5] /= scaling_factor
        results[:, [1,3]] = torch.clamp(results[:, [1,3]], 0.0, w)
        results[:, [2,4]] = torch.clamp(results[:, [2,4]], 0.0, h)
        copy_img = draw_grid(copy_img, results)
        
    return copy_img


In [40]:
# Create a VideoCapture object and read from input file 
cap = cv2.VideoCapture('jojo.mp4')

# Check if camera opened successfully 
if (cap.isOpened()== False):  
    print("Error opening video  file") 

# Read until video is completed 
while(cap.isOpened()):
      
    # Capture frame-by-frame 
    ret, frame = cap.read() 
    # Display the resulting frame 
    cv2.imshow('Frame', predict_image(frame)) 
    cv2.waitKey(1)

# When everything done, release  
# the video capture object 
cap.release() 

# Closes all the frames 
cv2.destroyAllWindows() 

KeyboardInterrupt: 