### 1. Download YOLO

In [1]:
from ultralytics import YOLO
import cv2

model = YOLO('yolo11l.pt')

video_path = '08fd33_4.mp4'
video_capture = cv2.VideoCapture(video_path)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt to 'yolo11l.pt'...


100%|██████████| 49.0M/49.0M [00:05<00:00, 8.76MB/s]


### 2. Config video writer

In [2]:
import os

width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video_capture.get(cv2.CAP_PROP_FPS))

output_name = 'output.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
video_writer = cv2.VideoWriter(output_name, fourcc, fps, (width, height))

### 3. Store the track histories

In [11]:
from collections import defaultdict
import numpy as np

track_history = defaultdict(lambda : [])
while video_capture.isOpened():
    success, frame = video_capture.read()

    if success:
        results = model.track(frame, persist=True, show=False)

        try:
            boxes = results[0].boxes.xywh.cpu()
            ids = results[0].boxes.id
            if ids is None:
                ids = []
            else:
                ids = ids.int().tolist()
        except AttributeError:
            boxes = []
            ids = []

        # plot detection results
        annotated_frame = results[0].plot(conf=False)

        # plot tracking results
        if ids:
            for box, track_id in zip(boxes, ids):
                x, y, w, h = box 
                track = track_history[track_id]
                track.append((float(x), float(y)))
                
                if len(track) > 30:
                    track.pop(0)

                # draw tracking lines
                points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
                cv2.polylines(
                    annotated_frame,
                    [points],
                    isClosed=False,color=(230, 230, 230),
                    thickness=4
                )

            # write the frame to output video
            video_writer.write(annotated_frame)
    else:
        break

video_capture.release()
video_writer.release()


0: 384x640 22 persons, 121.0ms
Speed: 3.0ms preprocess, 121.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 115.0ms
Speed: 3.0ms preprocess, 115.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 108.0ms
Speed: 3.0ms preprocess, 108.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 110.0ms
Speed: 4.0ms preprocess, 110.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 109.0ms
Speed: 3.0ms preprocess, 109.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 persons, 108.0ms
Speed: 4.0ms preprocess, 108.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 persons, 111.0ms
Speed: 2.0ms preprocess, 111.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 111.0ms
Speed: 3.0ms preprocess, 111.0ms inference, 2.0ms postproc