In [None]:
!pip install ultralytics
!pip install deep-sort-realtime
!pip install opencv-python-headless

Collecting ultralytics
  Downloading ultralytics-8.3.149-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [5]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch
from google.colab import files

In [6]:
model = YOLO('yolov8n.pt')

tracker = DeepSort(max_age=30, n_init=3, nn_budget=100)

video_path = 'Test Video.mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# Step 6: Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    results = model(frame, conf=0.5)  # Confidence threshold

    detections = []
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0].item()
            cls = int(box.cls[0].item())
            w, h = x2 - x1, y2 - y1
            detections.append(([x1, y1, w, h], conf, cls))

    tracks = tracker.update_tracks(detections, frame=frame)


    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)
        label = f'ID {track_id} {model.names[track.det_class]}'

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    out.write(frame)


cap.release()
out.release()
cv2.destroyAllWindows()


print("Processing complete. Downloading 'output.mp4'...")
files.download('output.mp4')


0: 384x640 2 cars, 99.1ms
Speed: 3.4ms preprocess, 99.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 103.5ms
Speed: 2.2ms preprocess, 103.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 96.7ms
Speed: 1.9ms preprocess, 96.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 96.7ms
Speed: 1.6ms preprocess, 96.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 97.4ms
Speed: 1.8ms preprocess, 97.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 100.0ms
Speed: 1.6ms preprocess, 100.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 100.5ms
Speed: 1.7ms preprocess, 100.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 92.6ms
Speed: 1.7ms preprocess, 92.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 3

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>