In [1]:
from collections import defaultdict
import cv2
import numpy as np
from ultralytics import YOLO
import imageio.v3 as iio

In [2]:
# Load the YOLO11 model
model = YOLO("./model/yolo11l.pt")

# Open the video file
video_path = "./data/video/vietnam.mp4"
cap = cv2.VideoCapture(video_path)

In [3]:
# Lấy thông tin từ video đầu vào
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
if fps == 0:
    fps = 30  # Đặt FPS mặc định nếu không có

size = (width, height)
output_path = "./data/output/vietnam_tracking.mp4"

# id không có thì mặc định là một list rỗng
track_history = defaultdict(lambda: [])

# Sử dụng imageio để ghi video đầu ra
with iio.imopen(output_path, "w", plugin="pyav") as writer:
    writer.init_video_stream("libx264", fps=fps)

    # Loop through the video frames
    while cap.isOpened():
        # Read a frame from the video
        success, frame = cap.read()
        if success:
            # Run YOLO11 tracking on the frame, persisting tracks between frames
            results = model.track(frame, persist=True, show=False)
#                                                   
            # Get the boxes and track IDs
            boxes = results[0].boxes.xywh.cpu()
            track_ids = results[0].boxes.id.int().cpu().tolist()

            # Visualize the results on the frame                                        
            annotated_frame = results[0].plot()

            # Plot the tracks
            for box, track_id in zip(boxes, track_ids):
                x, y, w, h = box 
                track = track_history[track_id]
                track.append((float(x), float(y)))  # x, y center point
                if len(track) > 120:  # retain 120 tracks for 120 frames
                    track.pop(0)

                # Draw the tracking lines
                points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
                cv2.polylines(
                    annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10
                )

            # Ghi khung hình vào video đầu ra
            writer.write_frame(annotated_frame)
        else:
            # Break the loop if the end of the video is reached
            break

# Khi hoàn tất, giải phóng tài nguyên
cap.release()
print(f"The video was successfully saved to {output_path}")


0: 384x640 19 persons, 6 cars, 26 motorcycles, 2 buss, 2 trucks, 373.3ms
Speed: 4.0ms preprocess, 373.3ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 6 cars, 26 motorcycles, 1 bus, 2 trucks, 261.0ms
Speed: 7.6ms preprocess, 261.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 5 cars, 25 motorcycles, 1 bus, 3 trucks, 260.2ms
Speed: 3.0ms preprocess, 260.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 5 cars, 25 motorcycles, 1 bus, 4 trucks, 258.2ms
Speed: 3.0ms preprocess, 258.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 5 cars, 25 motorcycles, 1 bus, 4 trucks, 1 backpack, 278.8ms
Speed: 2.0ms preprocess, 278.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 5 cars, 25 motorcycles, 1 bus, 4 trucks, 1 backpack, 278.4ms
Speed: 1.0ms preprocess, 278.4ms inference, 1.0