In [None]:
!pip install ultralytics opencv-python

In [12]:
import cv2
import numpy as np
from ultralytics import YOLO
from collections import defaultdict
import os
from google.colab import files
from IPython.display import display, HTML, clear_output
import matplotlib.pyplot as plt
from pathlib import Path
import time
from base64 import b64encode

In [8]:
# Upload a video file
print("Please upload your video file:")
uploaded = files.upload()

if not uploaded:
    print("No file was uploaded.")
else:
    # Get uploaded filename
    video_file = list(uploaded.keys())[0]
    print(f"Processing video: {video_file}")

Please upload your video file:


Saving input_video.mp4 to input_video (1).mp4
Processing video: input_video (1).mp4


In [9]:
# Create output directory
os.makedirs("output", exist_ok=True)

# Load YOLOv8 model
model = YOLO("yolov8n.pt")

In [10]:
# Single Person Tracking
output_path = "output/single_person_tracking.mp4"
print(f"\nStarting single person tracking...")

# Open video
cap = cv2.VideoCapture(video_file)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Tracking variables
paths = defaultdict(list)
selected_id = None
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count % 50 == 0:
        print(f"\rProgress: {frame_count}/{total_frames} frames", end="")

    # Tracking
    results = model.track(frame, persist=True, classes=0, tracker="botsort.yaml")

    if results[0].boxes.id is not None:
        boxes = results[0].boxes.xyxy.cpu().numpy()
        track_ids = results[0].boxes.id.int().cpu().numpy()

        for box, track_id in zip(boxes, track_ids):
            box = box.astype(int)
            center = ((box[0] + box[2]) // 2, (box[1] + box[3]) // 2)

            # Select first person if none selected
            if selected_id is None:
                selected_id = track_id

            # Only track selected person
            if track_id == selected_id:
                paths[track_id].append(center)
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)

                # Draw path
                if len(paths[track_id]) > 1:
                    for i in range(1, len(paths[track_id])):
                        alpha = i / len(paths[track_id])
                        thickness = int(5 * (1 - alpha)) + 1
                        cv2.line(frame, paths[track_id][i-1], paths[track_id][i],
                                (0, 0, 255), thickness)

    out.write(frame)

cap.release()
out.release()
print("\nSingle person tracking complete!")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 384x640 3 persons, 7.0ms
Speed: 3.2ms preprocess, 7.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 7.0ms
Speed: 3.2ms preprocess, 7.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 8.0ms
Speed: 3.2ms preprocess, 8.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.9ms
Speed: 2.9ms preprocess, 6.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.6ms
Speed: 2.8ms preprocess, 6.6ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 7.0ms
Speed: 3.3ms preprocess, 7.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 7.0ms
Speed: 3.0ms preprocess, 7.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 7.3ms
Speed: 3.0ms preprocess, 7.3ms 

In [14]:
# Multiple Person Tracking
output_path = "output/multiple_person_tracking.mp4"
print(f"\nStarting multiple person tracking...")

# Open video
cap = cv2.VideoCapture(video_file)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Tracking variables
paths = defaultdict(list)
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]  # Red, Green, Blue
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count % 50 == 0:
        print(f"\rProgress: {frame_count} frames", end="")

    # Tracking
    results = model.track(frame, persist=True, classes=0, tracker="botsort.yaml")

    if results[0].boxes.id is not None:
        boxes = results[0].boxes.xyxy.cpu().numpy()
        track_ids = results[0].boxes.id.int().cpu().numpy()

        for box, track_id in zip(boxes, track_ids):
            box = box.astype(int)
            center = ((box[0] + box[2]) // 2, (box[1] + box[3]) // 2)
            paths[track_id].append(center)

            # Draw bounding box with unique color
            color = colors[track_id % len(colors)]
            cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color, 2)

            # Draw path with fading effect
            if len(paths[track_id]) > 1:
                for i in range(1, len(paths[track_id])):
                    alpha = i / len(paths[track_id])
                    thickness = int(4 * (1 - alpha)) + 1
                    cv2.line(frame, paths[track_id][i-1], paths[track_id][i],
                            color, thickness)

    out.write(frame)

cap.release()
out.release()
print("\nMultiple person tracking complete!")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 384x640 3 persons, 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.5ms
Speed: 2.7ms preprocess, 6.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.6ms
Speed: 2.8ms preprocess, 6.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.8ms
Speed: 2.7ms preprocess, 6.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.6ms
Speed: 2.0ms preprocess, 6.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.8ms
Speed: 1.8ms preprocess, 6.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.7ms
Speed: 2.1ms preprocess, 6.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 6.6ms
Speed: 3.1ms preprocess, 6.6ms 

In [16]:
# Download Results
from google.colab import files

for task in ["single", "multiple"]:
    path = f"output/{task}_person_tracking.mp4"
    if os.path.exists(path):
        files.download(path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>