OPTIMIZED TRACKING

In [5]:
import cv2
import torch
import cupy as cp  # import CuPy for GPU acceleration
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# load YOLOv8 model to run on the GPU
model = YOLO(r'C:\Users\hp\Desktop\Master_Thesis_Project\Thesis Project\Final Thesis Project\New_Run\New_Run\Scenario 1\train\train\weights\best.pt').to(device)

# initialize Deep SORT tracker
tracker = DeepSort(
    max_age=150,       # assigns a new ID if the object doesn't appear in 150 frames
    n_init=5,          # increases confirmation count to reduce false positives for ID assignment
    nn_budget=100,     # keeps more historical embeddings to improve correct ID assignments
    max_iou_distance=0.30  # matches detected objects more carefully using IoU
)

video_path = r'C:\Users\hp\Desktop\Master_Thesis_Project\Dataset\Filtered_Video Dataset\filtered_video_4.mp4'
cap = cv2.VideoCapture(video_path)

# set up parameters for saving the output video
output_video_path = r'C:\Users\hp\Desktop\Master_Thesis_Project\Thesis Project\Final Thesis Project\Video & Image Output\output4.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = None

# process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # this step provides GPU acceleration
    frame_gpu = cp.asarray(frame)

    # convert cuPy array back to numPy array
    frame_numpy = cp.asnumpy(frame_gpu)

    # using the frame in NumPy format
    results = model(frame_numpy)

    # format the detected objects
    detections = []
    for r in results:
        for box in r.boxes:
            # extract coordinates (x1, y1, x2, y2) and detection confidence from YOLOv8
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())  # move detection from GPU to CPU
            w = x2 - x1  # width
            h = y2 - y1  # height
            confidence = box.conf[0].item()  # confidence score
            class_id = int(box.cls[0])  # class label
            
            detections.append(([x1, y1, w, h], confidence, class_id))

    # track objects using Deep SORT
    tracks = tracker.update_tracks(detections, frame=frame_numpy)

    # draw the tracked objects
    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        ltrb = track.to_ltrb()  # returns left, top, right, bottom
        x1, y1, x2, y2 = map(int, ltrb)

        # draw the tracked bounding box and ID
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # write output video
    if out is None:
        height, width, _ = frame.shape
        out = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))

    out.write(frame)

# release video source
cap.release()
out.release()



0: 288x512 20 persons, 62.0ms
Speed: 2.0ms preprocess, 62.0ms inference, 1.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 41.5ms
Speed: 2.0ms preprocess, 41.5ms inference, 2.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 41.4ms
Speed: 2.0ms preprocess, 41.4ms inference, 2.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 41.0ms
Speed: 1.4ms preprocess, 41.0ms inference, 2.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 41.9ms
Speed: 2.0ms preprocess, 41.9ms inference, 3.0ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 42.1ms
Speed: 1.0ms preprocess, 42.1ms inference, 1.5ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 42.5ms
Speed: 1.0ms preprocess, 42.5ms inference, 4.3ms postprocess per image at shape (1, 3, 288, 512)

0: 288x512 20 persons, 42.2ms
Speed: 1.0ms preprocess, 42.2ms inference, 2.0ms postprocess per image at