Standard Imports

In [47]:
import numpy as np
import pandas as pd
import cv2
import torch
import torchvision
import tensorflow as tf
from torchvision.models.detection import fasterrcnn_resnet50_fpn as fasterrcnn
from torchvision.ops.boxes import nms as torch_nms
from deepsort.deepsort import *



In [48]:
from IOU import IOU

The Non Maximum Supression function used for removing the multiple boxes around a certain object (Car)

In [49]:
def nms(boxes, conf_threshold=0.7, iou_threshold=0.4):
    bbox_list_thresholded = []
    bbox_list_new = []
    boxes_sorted = sorted(boxes, reverse=True, key = lambda x : x[5])
    for box in boxes_sorted:
        if box[5] > conf_threshold:
            bbox_list_thresholded.append(box)
        else:
            pass
    while len(bbox_list_thresholded) > 0:
        current_box = bbox_list_thresholded.pop(0)
        bbox_list_new.append(current_box)
        for box in bbox_list_thresholded:
            if current_box[4] == box[4]:
                iou = IOU(current_box[:4], box[:4])
                if iou > iou_threshold:
                    bbox_list_thresholded.remove(box)
    
    return bbox_list_new

In [50]:
def draw_box(frame, boundingbox, color=(0,255,0)):
	x1_val, y1_val, x2_val, y2_val = boundingbox
	cv2.rectangle(frame, pt1=(x1_val, y1_val), pt2=(x2_val, y2_val), color=color, thickness=2)
	return frame

In [51]:
def car_tracker(path_for_input_video,path_for_output_video):
    fasterrcnn_model = fasterrcnn(pretrained=True, progress=True, num_classes=91, pretrained_backbone=True)
    fasterrcnn_model.eval()
    
    video_capture=cv2.VideoCapture(path_for_input_video)
    w = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    f = int(video_capture.get(cv2.CAP_PROP_FPS)) #frames
    c = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(path_for_output_video, c, f, (w, h))
    sort_tracker=Sort()
    while not False:
        flag,frame=video_capture.read()
        if not flag:
            break
        real_frame = np.divide(frame.copy(),255)
        real_frame=torch.from_numpy(real_frame)
        real_frame = real_frame.float()
        real_frame = torch.swapaxes(real_frame,0,2)
        real_frame = torch.swapaxes(real_frame,1,2)
        real_frame = torch.unsqueeze(real_frame,0)
        
        with torch.no_grad():
            predict = fasterrcnn_model(real_frame)

        boxes, labels, scores = predict[0]['boxes'], predict[0]['labels'], predict[0]['scores']
        boxes, labels, scores = boxes.numpy(), labels.numpy(), scores.numpy()
        bounding_boxes = []
        for i,j,k in zip(boxes, labels, scores):
            if j == 3:
                bounding_boxes.append(list(i) + [j] + [k])
        bounding_boxes = np.array(nms(bounding_boxes,0.2,0.5))
        # bounding_boxes = tf.convert_to_tensor(bounding_boxes)
        # bounding_boxes = np.array(torch_nms(bounding_boxes, scores, 0.5))
        # boxes = boxes.numpy()
        # print("bounding_box : ", bounding_boxes)
        # print("boxes : ", boxes)
        # b_boxes = []
        # for i in bounding_boxes:
        #     b_boxes.append(boxes[i])
        # bounding_boxes = b_boxes
        labels = []
        for i in bounding_boxes:
            labels.append(i[4])
        bounding_boxes = np.delete(bounding_boxes,4,1)

        track_boxes_id = sort_tracker.update(bounding_boxes)
        for id in track_boxes_id:
            frame = draw_box(frame,(int(id[0]),int(id[1]),int(id[2]),int(id[3])))

        image_frame = cv2.putText(frame, f'Car Counter: {len(track_boxes_id)}', (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2)

        writer.write(image_frame)
        


In [52]:
pred=car_tracker("video1.mp4","output.mp4")