# Input setup

In [33]:
import torch
import cv2
import numpy as np

cap = cv2.VideoCapture('./testVideos/vid2.mp4')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Create a video writer object to save the output video in MP4 format
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./outputs/vid2YOLO_Deepsort.mp4', fourcc, fps, (width, height))

# YOLO

In [34]:
# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

Using cache found in /Users/vatsal007/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2023-3-10 Python-3.8.16 torch-2.1.0.dev20230302 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


# DeepSort

In [35]:
from deep_sort_realtime.deepsort_tracker import DeepSort
from deep_sort.deep_sort import nn_matching
from deep_sort.deep_sort.detection import Detection
from deep_sort.deep_sort.tracker import Tracker
from deep_sort.tools import generate_detections
from deep_sort.application_util import preprocessing

# Set up the DeepSORT tracker
max_cosine_distance = 0.1
nn_budget = None
nms_max_overlap = 1.0

model_filename = './deep_sort/model_data/mars-small128.pb'
encoder = generate_detections.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)

2023-03-22 23:18:07.416847: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-22 23:18:07.416880: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Detection and Display

In [36]:
import time
start_time = time.time()
num_frames = 0

ids = []
while(cap.isOpened()):
      
# Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
        # Increment the frame count
        num_frames += 1
        # Calculate fps
        elapsed_time = time.time() - start_time
        fps = num_frames / elapsed_time
        # Display the fps
        cv2.putText(frame, f"FPS: {round(fps, 2)}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        #detecting any object
        img = []
        img.append(frame)
        result = model(img).pandas().xyxy[0]
        yoloout = result

        # vehicles = []

        #converting output to deepsort input
        boxes, scores, classes, detections = [],[],[],[]
        for i in result.index:
            if(result['name'][i] == 'car' or result['name'][i] == 'truck' or result['name'][i] == 'motorcycle' or result['name'][i] == 'bicycle'):
                classes.append(result['name'][i])
                scores.append(result['confidence'][i])
                xleft,yleft,xright,yright = int(result['xmin'][i]),int(result['ymin'][i]),int(result['xmax'][i]),int(result['ymax'][i])
                bbox = [xleft, yleft, xright-xleft, yright-yleft]
                boxes.append(bbox)
        classes = np.array(classes)
        scores = np.array(scores)
        boxes = np.array(boxes)
        features = np.array(encoder(frame,boxes))
        detections = [Detection(bbox,score,feature) for bbox, score, class_name, feature in zip(boxes, scores, classes, features)]

        boxes = np.array([detection.tlwh for detection in detections])
        scores = np.array(detection.confidence for detection in detections)

        # indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,scores)
        # detections = [detections[i] for i in indices]

        # Update the tracker with the current detections
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            ids.append(track.track_id)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2)
            cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)


        cv2.putText(frame, "Total Cars: "+str(len(set(ids))), (10,10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)
        cv2.imshow('Frame', frame)
        # write frame to output video
        out.write(frame)
            

    # Press Q on keyboard to exit
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
  
# Break the loop
    else:
        break

2023-03-22 23:18:07.742900: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
1   HIToolbox                           0x00000001a3f3390c _ZN15MenuBarInstance22RemoveAutoShowObserverEv + 44
2   HIToolbox                           0x00000001a3f72478 _ZL17BroadcastInternaljPvh + 184
3   SkyLight                            0x000000019f485214 _ZN12_GLOBAL__N_123notify_datagram_handlerEj15CGSDatagramTypePvmS1_ + 896
4   SkyLight                            0x000000019f483e10 CGSSnarfAndDispatchDatagrams + 808
5   SkyLight                            0x000000019f7aba9c SLSGetNextEventRecordInternal + 344
6   SkyLight                            0x000000019f5acfb0 SLEventCreateNextEvent + 16
7   HIToolbox                           0x00000001a3f40b58 _ZL38PullEventsFromWindowServerOnConnectionjhP17__CFMachPortBoost + 60
8   HIToolbox                           0x00000001a3f40ae0 _ZL14MessageHandlerP12__CFMachPortPvlS1_ + 6