### Imports

In [1]:
import cv2
import numpy as np
import sys
import time
import os
import yaml

# Get main path
MAIN_PATH = os.getcwd()

sys.path.append(f'{MAIN_PATH}\\DeepSORT_YOLOv5_Pytorch\\')
sys.path.append(f'{MAIN_PATH}\\DeepSORT_YOLOv5_Pytorch\\deep_sort\\deep\\checkpoint\\')

from yolov5.utils.general import non_max_suppression, scale_coords, xyxy2xywh
from yolov5.utils.torch_utils import select_device
from yolov5.utils.datasets import letterbox

sys.path.append(f'{MAIN_PATH}\\DeepSORT_YOLOv5_Pytorch\\yolov5\\models\\')
from experimental import attempt_load

from utils_ds.parser import get_config
from deep_sort import build_tracker

from utils_ds.parser import get_config

import torch
import torch.backends.cudnn as cudnn
import mediapipe as mp
cudnn.benchmark = True

from IPython.display import clear_output

  from .autonotebook import tqdm as notebook_tqdm


### Config File Deepsort

In [2]:
def create_file_config_deepsort(save_path):
    path_yaml = MAIN_PATH + '\\DeepSORT_YOLOv5_Pytorch\\deep_sort\\deep\\checkpoint\\ckpt.t7'

    config = {
        'DEEPSORT': {
            'REID_CKPT': path_yaml,
            'MAX_DIST': 0.2,
            'MIN_CONFIDENCE': 0.3,
            'NMS_MAX_OVERLAP': 0.5,
            'MAX_IOU_DISTANCE': 0.7,
            'MAX_AGE': 70,
            'N_INIT': 3,
            'NN_BUDGET': 100
        }
    }
    with open(save_path, 'w') as file:
        yaml.dump(config, file)

    return save_path

### Aply Tracking on Image

In [3]:
def image_track(im0, 
                detector,
                deepsort,
                img_size,
                device,
                conf_thres,
                iou_thres,
                classes):
    """
        Deep Sort Tracking for YOLOv5 Inference method.
    """
    
    # preprocess ************************************************************
    # Padded resize
    img = letterbox(im0, new_shape=img_size)[0]
    
    # Convert:
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)

    # numpy to tensor
    img = torch.from_numpy(img).to(device)
    img = img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)
    s = '%gx%g ' % img.shape[2:]  # print string

    # Detection time *********************************************************
    # Inference
    with torch.no_grad():
        pred = detector(img)[0]  # list: bz * [ (#obj, 6)]

    # Apply NMS and filter object other than person (cls:0)
    pred = non_max_suppression(pred, conf_thres, iou_thres,
                               classes=classes)

    # get all obj ************************************************************
    det = pred[0]  # for video, bz is 1
    if det is not None and len(det):  # det: (#obj, 6)  x1 y1 x2 y2 conf cls

        # Rescale boxes from img_size to original im0 size
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

        # Print results. statistics of number of each obj
        for c in det[:, -1].unique():
            n = (det[:, -1] == c).sum()  # detections per class
            s += '%g %ss, ' % (n, names[int(c)])  # add to string
            
        bbox_xywh = xyxy2xywh(det[:, :4]).cpu()
        confs = det[:, 4:5].cpu()
        class_det = det[:, -1].cpu()
        # ****************************** deepsort ****************************
        outputs = deepsort.update(bbox_xywh, confs, im0, class_det)
        # (#ID, 5) x1,y1,x2,y2,track_ID
    else:
        outputs = torch.zeros((0, 5))
        confs = torch.zeros((0, 1))
                            
    return outputs, confs

###########################################################################################################################

### Draw Detections

In [4]:
def compute_color_for_labels(label):
        """
            Simple function that adds fixed color depending on the class.
        """

        color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
        return tuple(color)

In [5]:
def draw_bounding_box(image,  outputs, confs, class_names):
    """
        Draw bounding box and class name with confidence.
    """
    
    # Get the outputs
    bbox_xyxy  = outputs[:, :4]
    identities = outputs[:, -2]
    class_det  = outputs[:, -1]

    for i, (bbox, conf, classe) in enumerate(zip(bbox_xyxy, confs, class_det)):
        # Get the coordinates
        x1, y1, x2, y2 = bbox.astype(int)
        confidence = str(conf.numpy()[0])[2:4]
        name = class_names[int(classe)]

        id = int(identities[i]) if identities is not None else 0 
        color  = compute_color_for_labels(id)

        # text = f'id:{id}  class:{names}  conf: {confidence}%'
        text = f'id:{id} conf : {confidence}% class: {name}'
        t_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
        
        # Draw Detections in the image
        cv2.rectangle(image, (x1, y1), (x2,y2), color, 3)
        cv2.rectangle(image, (x1, y1), (x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
        cv2.putText(image, text, (x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 2)

    return image

### Main

In [6]:
def main(video_path):
    """
        Main function to run Deep Sort Tracking with YOLOv5 Inference.
    """

    # Defined parameters to img size:
    cap = cv2.VideoCapture(video_path)
    success, frame = cap.read()
    img_size       = (max(frame.shape[:2]), (max(frame.shape[:2])))
    img_size       = (640, 480)

    # Set which frequency detection models will process frames:
    frame_interval = 2

    # Auxiliars:
    idx_frame = 0
    last_out  = None

    # Tracking
    while cap.isOpened():

        success, frame = cap.read()

        # Check if video ends:
        if not success:
            
            # Repeat video:
            cap = cv2.VideoCapture(video_path)
            success, frame = cap.read()

        # Make a frame copy and get black board auxiliar:
        frame_copy  = frame.copy()

        #####################################################################################################
        # Apply Deep Sort Track:
        if idx_frame % frame_interval == 0:
            outputs, confs = image_track(frame,
                                        detector,
                                        deepsort,
                                        img_size,
                                        device,
                                        conf_thres,
                                        iou_thres,
                                        classes)
            
            # Update last output detection:
            last_out = outputs
        
        # Use prediction of the previous frames:
        else: outputs = last_out  
            
        #####################################################################################################

        # For each YOLOv5 Detection after Deep Sort, draw bbox and apply pose estimation:
        if len(outputs) > 0:
            frame = draw_bounding_box(frame, outputs, confs, names)  # BGR
            
            # Use Mediapipe Pose detection applying to YOLOv5 detected objects:
            if idx_frame % 5*frame_interval == 0:
                pose_output   = np.zeros_like(frame)
                    
        ##############################################################################################
            
        # Keyboard Controls:
        key = cv2.waitKey(1) or 0xff   
        if key == ord('k') or key == ord('q'): break
            
        # Show output:
        cv2.imshow('Detection:', frame)
        
        # Update frame counter:
        idx_frame += 1
        if idx_frame == 3*frame_interval: idx_frame = 0
        
        # Clear output:
        clear_output(wait=False)
        
    #######################################################################################################
    
    # Release Video:
    cv2.destroyAllWindows()
    cap.release()

### Defined Params

In [7]:
# Load YOLOv5 Detector:
device = select_device('0')
model_weights = torch.load(f'{MAIN_PATH}\\DeepSORT_YOLOv5_Pytorch\\yolov5\\weights\\yolov5s.pt', map_location=device)
model_weights['model'] = model_weights['model'].float()
detector = model_weights['model'].to(device).eval()
names = detector.module.names if hasattr(detector, 'module') else detector.names

# Config Deepsort:
save_path = MAIN_PATH + '\\DeepSORT_YOLOv5_Pytorch\\configs\\deep_sort.yaml'
deepsort_config = create_file_config_deepsort(save_path)

# Configurations for car detection:
conf_thres = 0.65
iou_thres  = 0.10
classes    = [2] # classe for car detection

palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) # Pallete for colors of bounding boxes
video_path = 'media/cars.mp4'

# Load Deep Sort:
cfg = get_config()
cfg.merge_from_file(deepsort_config)
deepsort = build_tracker(cfg, use_cuda=True)

if __name__ == '__main__':
    main(video_path)