In [1]:
!pip install ultralytics opencv-python opencv-python-headless numpy matplotlib flask


Collecting ultralytics
  Downloading ultralytics-8.3.52-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.52-py3-none-any.whl (901 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m901.7/901.7 kB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.52 ultralytics-thop-2.0.13


In [2]:
from ultralytics import YOLO
model = YOLO("yolov8n.pt")  # Replace with a larger model if resources allow


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 242MB/s]


In [3]:
import cv2
import numpy as np
from ultralytics import YOLO
from collections import defaultdict
import os

# Initialize YOLO model
model = YOLO("/content/yolov8n.pt")

# Input and output paths
input_video_path = "/content/macv-obj-tracking-video.mp4"
output_video_path = "output_video.mp4"

# Initialize video capture
cap = cv2.VideoCapture(input_video_path)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Object tracking data
object_data = defaultdict(lambda: {"frames": [], "centroids": []})
next_object_id = 1
object_id_mapping = {}

# Function to calculate centroid of a bounding box
def calculate_centroid(box):
    x_min, y_min, x_max, y_max = box
    return int((x_min + x_max) / 2), int((y_min + y_max) / 2)

# Process each frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO inference
    results = model(frame)

    # Retrieve bounding boxes and class IDs
    detections = results[0].boxes.xyxy.cpu().numpy()  # Bounding boxes
    class_ids = results[0].boxes.cls.cpu().numpy().astype(int)  # Class IDs
    class_names = [model.names[class_id] for class_id in class_ids]  # Map to names

    frame_data = []

    for det, class_name in zip(detections, class_names):
        x_min, y_min, x_max, y_max = map(int, det)
        centroid = calculate_centroid((x_min, y_min, x_max, y_max))

        # Match with existing objects
        matched = False
        for obj_id, data in object_data.items():
            if data["frames"] and np.linalg.norm(np.array(data["centroids"][-1]) - np.array(centroid)) < 50:  # Match threshold
                object_data[obj_id]["frames"].append(len(object_data[obj_id]["frames"]) + 1)
                object_data[obj_id]["centroids"].append(centroid)
                frame_data.append((obj_id, (x_min, y_min, x_max, y_max), centroid, class_name))
                matched = True
                break

        if not matched:
            # Assign new object ID
            global next_object_id
            object_data[next_object_id]["frames"].append(1)
            object_data[next_object_id]["centroids"].append(centroid)
            frame_data.append((next_object_id, (x_min, y_min, x_max, y_max), centroid, class_name))
            next_object_id += 1

    # Draw bounding boxes, centroids, trails, and labels
    for obj_id, box, centroid, class_name in frame_data:
        x_min, y_min, x_max, y_max = box
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        cv2.circle(frame, centroid, 5, (0, 0, 255), -1)
        cv2.putText(frame, class_name, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)

        # Draw trail
        for i in range(1, len(object_data[obj_id]["centroids"])):
            cv2.line(frame, object_data[obj_id]["centroids"][i - 1], object_data[obj_id]["centroids"][i], (255, 0, 0), 2)

    # Write frame to output video
    out.write(frame)

# Release resources
cap.release()
out.release()

# Export metrics
metrics = {
    "unique_object_ids": len(object_data),
    "time_spent_per_object": {
        obj_id: len(data["frames"]) / fps for obj_id, data in object_data.items()
    },
}

# Print metrics
print("Metrics:")
print(f"Total Unique Objects Detected: {metrics['unique_object_ids']}")
for obj_id, time_spent in metrics["time_spent_per_object"].items():
    print(f"Object {obj_id}: {time_spent:.2f} seconds")





0: 384x640 8 persons, 1 chair, 112.4ms
Speed: 20.3ms preprocess, 112.4ms inference, 1163.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 chair, 14.7ms
Speed: 3.6ms preprocess, 14.7ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 1 chair, 11.0ms
Speed: 3.1ms preprocess, 11.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 handbag, 1 chair, 10.5ms
Speed: 2.9ms preprocess, 10.5ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 handbag, 1 chair, 10.7ms
Speed: 2.5ms preprocess, 10.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 chair, 9.8ms
Speed: 2.4ms preprocess, 9.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 2 handbags, 1 chair, 10.3ms
Speed: 2.4ms preprocess, 10.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x64

In [None]:
!ffmpeg -i osutput_video.mp4 -vcodec libx264 -acodec aac compatible_video.mp4


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab