In [5]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Выбрано устройство: {device}")

Выбрано устройство: cuda


# Import libraries

In [6]:
from pathlib import Path
import cv2
import numpy as np
from ultralytics import YOLO
from tqdm import tqdm
import mimetypes
from project_utils import *

In [7]:
notebook_dir = Path().resolve()
project_root = notebook_dir.parent
input_video_path = project_root / "data" / "crowd.mp4"
output_video_path_dir = project_root / "results"
output_video_path_dir.mkdir(exist_ok=True)

# Yolo_pretrained_inference

In [3]:
def run_yolo_inference_on_video(
    model_name: str,
    input_video_path: Path,
    output_dir: Path
) -> Path:
    """
    Запускает инференс YOLOv8 на видео и сохраняет результат.
    
    Аргументы:
        model_name (str): Название предобученной модели YOLOv8 (например, 'yolov8n', 'yolov8s').
        input_video_path (Path): Путь к входному видеофайлу.
        output_dir (Path): Директория для сохранения результата.
    
    Возвращает:
        Path: Путь к сохранённому видеофайлу.
    """
    # Валидация и открытие видео
    validate_video_file(input_video_path)
    cap, meta = open_video_capture(input_video_path)

    # Загрузка модели
    model = YOLO(f"{model_name}.pt")

    # Путь к выходному файлу
    output_video_path = output_dir / f"{model_name}_output.mp4"
    output_dir.mkdir(exist_ok=True)

    # Инициализация записи
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_video_path), fourcc, meta["fps"], (meta["width"], meta["height"]))

    print(f"Обработка видео: {meta['total_frames']} кадров, {meta['fps']} FPS (модель: {model_name})")

    # Обработка кадров
    for _ in tqdm(range(meta["total_frames"]), desc=f"{model_name} inference"):
        ret, frame = cap.read()
        if not ret:
            break

        # Инференс
        results = model(frame, verbose=False)
        boxes, confidences, class_ids = [], [], []

        for result in results:
            for box in result.boxes:
                boxes.append(box.xyxy[0].cpu().numpy())
                confidences.append(float(box.conf.item()))
                class_ids.append(int(box.cls.item()))

        # Отрисовка
        frame = draw_detections(frame, boxes, confidences, class_ids)

        # Запись
        out.write(frame)

    # Очистка ресурсов
    cap.release()
    out.release()

    print(f"Результат сохранён: {output_video_path.resolve()}")
    return output_video_path

In [4]:
YOLO_MODELS = [
    # YOLOv8
    "yolov8n", "yolov8s", "yolov8m", "yolov8l", "yolov8x",
    # YOLOv9
    "yolov9c", "yolov9e",
    # YOLOv10
    "yolov10n", "yolov10s", "yolov10m", "yolov10b", "yolov10l", "yolov10x"
    # YOLO11
    "yolo11n", "yolo11s", "yolo11m", "yolo11l", "yolo11x"
]

for model_name in YOLO_MODELS:
    print(f"\n[+] Запуск инференса для модели: {model_name}")
    try:
        run_yolo_inference_on_video(model_name, input_video_path, output_video_path_dir)
    except Exception as e:
        print(f"[!] Ошибка при запуске {model_name}: {e}")


[+] Запуск инференса для модели: yolov8n
Обработка видео: 705 кадров, 29 FPS (модель: yolov8n)


yolov8n inference: 100%|██████████| 705/705 [00:26<00:00, 26.45it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8n_output.mp4

[+] Запуск инференса для модели: yolov8s
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100% ━━━━━━━━━━━━ 21.5MB 9.9MB/s 2.2s 2.1s<0.1ss31
Обработка видео: 705 кадров, 29 FPS (модель: yolov8s)


yolov8s inference: 100%|██████████| 705/705 [00:48<00:00, 14.48it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8s_output.mp4

[+] Запуск инференса для модели: yolov8m
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt': 100% ━━━━━━━━━━━━ 49.7MB 10.6MB/s 4.7s4.6s<0.0s7s8
Обработка видео: 705 кадров, 29 FPS (модель: yolov8m)


yolov8m inference: 100%|██████████| 705/705 [01:34<00:00,  7.43it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8m_output.mp4

[+] Запуск инференса для модели: yolov8l
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l.pt to 'yolov8l.pt': 100% ━━━━━━━━━━━━ 83.7MB 10.8MB/s 7.7s.6s<0.4ss8s
Обработка видео: 705 кадров, 29 FPS (модель: yolov8l)


yolov8l inference: 100%|██████████| 705/705 [03:03<00:00,  3.85it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8l_output.mp4

[+] Запуск инференса для модели: yolov8x
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt': 100% ━━━━━━━━━━━━ 130.5MB 11.0MB/s 11.9s11.8s<0.1s3
Обработка видео: 705 кадров, 29 FPS (модель: yolov8x)


yolov8x inference: 100%|██████████| 705/705 [04:18<00:00,  2.72it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8x_output.mp4

[+] Запуск инференса для модели: yolov9c
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov9c.pt to 'yolov9c.pt': 100% ━━━━━━━━━━━━ 49.4MB 10.3MB/s 4.8s4.8s<0.0s1s2
Обработка видео: 705 кадров, 29 FPS (модель: yolov9c)


yolov9c inference: 100%|██████████| 705/705 [02:19<00:00,  5.04it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov9c_output.mp4

[+] Запуск инференса для модели: yolov9e
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov9e.pt to 'yolov9e.pt': 100% ━━━━━━━━━━━━ 112.1MB 10.7MB/s 10.5s 10.5s<0.0s
Обработка видео: 705 кадров, 29 FPS (модель: yolov9e)


yolov9e inference: 100%|██████████| 705/705 [04:21<00:00,  2.70it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov9e_output.mp4

[+] Запуск инференса для модели: yolov10n
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov10n.pt to 'yolov10n.pt': 100% ━━━━━━━━━━━━ 5.6MB 6.7MB/s 0.8s0.8s<0.0s4s5s
Обработка видео: 705 кадров, 29 FPS (модель: yolov10n)


yolov10n inference: 100%|██████████| 705/705 [00:27<00:00, 25.24it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov10n_output.mp4

[+] Запуск инференса для модели: yolov10s
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov10s.pt to 'yolov10s.pt': 100% ━━━━━━━━━━━━ 15.9MB 9.3MB/s 1.7s 1.6s<0.1s7s
Обработка видео: 705 кадров, 29 FPS (модель: yolov10s)


yolov10s inference: 100%|██████████| 705/705 [00:45<00:00, 15.61it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov10s_output.mp4

[+] Запуск инференса для модели: yolov10m
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov10m.pt to 'yolov10m.pt': 100% ━━━━━━━━━━━━ 32.1MB 8.7MB/s 3.7s 3.7s<0.0ss6
Обработка видео: 705 кадров, 29 FPS (модель: yolov10m)


yolov10m inference: 100%|██████████| 705/705 [01:25<00:00,  8.29it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov10m_output.mp4

[+] Запуск инференса для модели: yolov10b
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov10b.pt to 'yolov10b.pt': 100% ━━━━━━━━━━━━ 39.7MB 10.4MB/s 3.8s3.8s<0.0s1s3
Обработка видео: 705 кадров, 29 FPS (модель: yolov10b)


yolov10b inference: 100%|██████████| 705/705 [01:55<00:00,  6.12it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov10b_output.mp4

[+] Запуск инференса для модели: yolov10l
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov10l.pt to 'yolov10l.pt': 100% ━━━━━━━━━━━━ 50.0MB 10.7MB/s 4.7s4.6s<0.1s2s8
Обработка видео: 705 кадров, 29 FPS (модель: yolov10l)


yolov10l inference: 100%|██████████| 705/705 [02:23<00:00,  4.90it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov10l_output.mp4

[+] Запуск инференса для модели: yolov10xyolo11n
[!] Ошибка при запуске yolov10xyolo11n: [Errno 2] No such file or directory: 'yolov10xyolo11n.pt'

[+] Запуск инференса для модели: yolo11s
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt to 'yolo11s.pt': 100% ━━━━━━━━━━━━ 18.4MB 9.7MB/s 1.9s 1.9s<0.0ss17
Обработка видео: 705 кадров, 29 FPS (модель: yolo11s)


yolo11s inference: 100%|██████████| 705/705 [00:46<00:00, 15.30it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolo11s_output.mp4

[+] Запуск инференса для модели: yolo11m
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt': 100% ━━━━━━━━━━━━ 38.8MB 10.4MB/s 3.7s3.7s<0.1s2s2
Обработка видео: 705 кадров, 29 FPS (модель: yolo11m)


yolo11m inference: 100%|██████████| 705/705 [01:38<00:00,  7.16it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolo11m_output.mp4

[+] Запуск инференса для модели: yolo11l
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt to 'yolo11l.pt': 100% ━━━━━━━━━━━━ 49.0MB 10.6MB/s 4.6s4.6s<0.0s4s3
Обработка видео: 705 кадров, 29 FPS (модель: yolo11l)


yolo11l inference: 100%|██████████| 705/705 [02:00<00:00,  5.84it/s]


Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolo11l_output.mp4

[+] Запуск инференса для модели: yolo11x
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt to 'yolo11x.pt': 100% ━━━━━━━━━━━━ 109.3MB 10.9MB/s 10.0s9.9s<0.1ss3
Обработка видео: 705 кадров, 29 FPS (модель: yolo11x)


yolo11x inference: 100%|██████████| 705/705 [03:48<00:00,  3.09it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolo11x_output.mp4





# SAHI_and_Yolo_pretrained_inference

In [4]:
from sahi.models.ultralytics import UltralyticsDetectionModel
from sahi.predict import get_sliced_prediction
import torch  # для проверки CUDA


def run_yolo_sahi_inference_on_video(
    model_name: str,
    input_video_path: Path,
    output_dir: Path,
    slice_height: int = 640,
    slice_width: int = 640,
    overlap_height_ratio: float = 0.2,
    overlap_width_ratio: float = 0.2,
    confidence_threshold: float = 0.3
) -> Path:
    validate_video_file(input_video_path)
    cap, meta = open_video_capture(input_video_path)

    # Автоматическое определение устройства
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Используемое устройство: {device}")

    detection_model = UltralyticsDetectionModel(
        model_path=f"{model_name}.pt",
        confidence_threshold=confidence_threshold,
        device=device
    )

    output_video_path = output_dir / f"{model_name}_sahi_output.mp4"
    output_dir.mkdir(exist_ok=True)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_video_path), fourcc, meta["fps"], (meta["width"], meta["height"]))

    print(f"SAHI + {model_name} обработка: {meta['total_frames']} кадров, {meta['fps']} FPS")

    for _ in tqdm(range(meta["total_frames"]), desc=f"{model_name}+SAHI"):
        ret, frame = cap.read()
        if not ret:
            break

        result = get_sliced_prediction(
            image=frame,
            detection_model=detection_model,
            slice_height=slice_height,
            slice_width=slice_width,
            overlap_height_ratio=overlap_height_ratio,
            overlap_width_ratio=overlap_width_ratio,
            verbose=0
        )

        boxes, confidences, class_ids = [], [], []
        for obj in result.object_prediction_list:
            if obj.category.id != 0:  # только люди (COCO class 0)
                continue
            bbox = obj.bbox.to_xyxy()
            boxes.append(bbox)
            confidences.append(obj.score.value)
            class_ids.append(obj.category.id)

        frame = draw_detections(frame, boxes, confidences, class_ids)
        out.write(frame)

    cap.release()
    out.release()
    print(f"Результат сохранён: {output_video_path.resolve()}")
    return output_video_path


In [5]:
run_yolo_sahi_inference_on_video("yolov8s", input_video_path, output_video_path_dir)

Используемое устройство: cuda
SAHI + yolov8s обработка: 705 кадров, 29 FPS


yolov8s+SAHI: 100%|██████████| 705/705 [02:53<00:00,  4.07it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8s_sahi_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/yolov8s_sahi_output.mp4')

In [6]:
run_yolo_sahi_inference_on_video("yolov8x", input_video_path, output_video_path_dir)

Используемое устройство: cuda
SAHI + yolov8x обработка: 705 кадров, 29 FPS


yolov8x+SAHI: 100%|██████████| 705/705 [04:15<00:00,  2.76it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8x_sahi_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/yolov8x_sahi_output.mp4')

# RT-DETR_pretrained_inference

In [7]:
def run_rt_detr_inference_on_video(
    model_name: str,
    input_video_path: Path,
    output_dir: Path
) -> Path:
    """
    Запускает инференс RT-DETR на видео и сохраняет результат.
    
    Аргументы:
        model_name (str): Название модели, например 'rtdetr-l', 'rtdetr-x'.
        input_video_path (Path): Путь к входному видеофайлу.
        output_dir (Path): Директория для сохранения результата.
    
    Возвращает:
        Path: Путь к сохранённому видеофайлу.
    """
    validate_video_file(input_video_path)
    cap, meta = open_video_capture(input_video_path)

    # Загрузка RT-DETR модели через Ultralytics
    model = YOLO(f"{model_name}.pt")

    output_video_path = output_dir / f"{model_name}_output.mp4"
    output_dir.mkdir(exist_ok=True)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_video_path), fourcc, meta["fps"], (meta["width"], meta["height"]))

    print(f"Обработка видео: {meta['total_frames']} кадров, {meta['fps']} FPS (модель: {model_name})")

    for _ in tqdm(range(meta["total_frames"]), desc=f"{model_name} inference"):
        ret, frame = cap.read()
        if not ret:
            break

        # Инференс
        results = model(frame, verbose=False)
        boxes, confidences, class_ids = [], [], []

        for result in results:
            for box in result.boxes:
                boxes.append(box.xyxy[0].cpu().numpy())
                confidences.append(float(box.conf.item()))
                class_ids.append(int(box.cls.item()))

        # Отрисовка (та же функция, что и для YOLO)
        frame = draw_detections(frame, boxes, confidences, class_ids)
        out.write(frame)

    cap.release()
    out.release()
    print(f"Результат сохранён: {output_video_path.resolve()}")
    return output_video_path

In [8]:
run_rt_detr_inference_on_video("rtdetr-l", input_video_path, output_video_path_dir)

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/rtdetr-l.pt to 'rtdetr-l.pt': 100% ━━━━━━━━━━━━ 63.4MB 10.1MB/s 6.3s6.2s<0.1s3s4
Обработка видео: 705 кадров, 29 FPS (модель: rtdetr-l)


rtdetr-l inference: 100%|██████████| 705/705 [00:34<00:00, 20.61it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\rtdetr-l_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/rtdetr-l_output.mp4')

In [9]:
run_rt_detr_inference_on_video("rtdetr-x", input_video_path, output_video_path_dir)

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/rtdetr-x.pt to 'rtdetr-x.pt': 100% ━━━━━━━━━━━━ 129.5MB 9.5MB/s 13.6ss 13.6s<0.0s
Обработка видео: 705 кадров, 29 FPS (модель: rtdetr-x)


rtdetr-x inference: 100%|██████████| 705/705 [00:49<00:00, 14.31it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\rtdetr-x_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/rtdetr-x_output.mp4')

# SAHI_and_RT-DETR_pretrained_inference

In [10]:
from sahi.models.ultralytics import UltralyticsDetectionModel
from sahi.predict import get_sliced_prediction
import torch
from pathlib import Path
from tqdm import tqdm


def run_rt_detr_sahi_inference_on_video(
    model_name: str,
    input_video_path: Path,
    output_dir: Path,
    slice_height: int = 640,
    slice_width: int = 640,
    overlap_height_ratio: float = 0.2,
    overlap_width_ratio: float = 0.2,
    confidence_threshold: float = 0.3
) -> Path:
    """
    Запускает инференс RT-DETR + SAHI на видео и сохраняет результат.
    
    Поддерживает модели: 'rtdetr-l', 'rtdetr-x'.
    """
    validate_video_file(input_video_path)
    cap, meta = open_video_capture(input_video_path)

    # Автоматический выбор устройства
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Используемое устройство: {device}")

    # SAHI-совместимая модель (работает с любыми Ultralytics-моделями, включая RT-DETR)
    detection_model = UltralyticsDetectionModel(
        model_path=f"{model_name}.pt",
        confidence_threshold=confidence_threshold,
        device=device
    )

    output_video_path = output_dir / f"{model_name}_sahi_output.mp4"
    output_dir.mkdir(exist_ok=True)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_video_path), fourcc, meta["fps"], (meta["width"], meta["height"]))

    print(f"SAHI + {model_name} обработка: {meta['total_frames']} кадров, {meta['fps']} FPS")

    for _ in tqdm(range(meta["total_frames"]), desc=f"{model_name}+SAHI"):
        ret, frame = cap.read()
        if not ret:
            break

        # SAHI inference
        result = get_sliced_prediction(
            image=frame,
            detection_model=detection_model,
            slice_height=slice_height,
            slice_width=slice_width,
            overlap_height_ratio=overlap_height_ratio,
            overlap_width_ratio=overlap_width_ratio,
            verbose=0
        )

        boxes, confidences, class_ids = [], [], []
        for obj in result.object_prediction_list:
            if obj.category.id != 0:  # class 0 = person
                continue
            bbox = obj.bbox.to_xyxy()
            boxes.append(bbox)
            confidences.append(obj.score.value)
            class_ids.append(obj.category.id)

        frame = draw_detections(frame, boxes, confidences, class_ids)
        out.write(frame)

    cap.release()
    out.release()
    print(f"Результат сохранён: {output_video_path.resolve()}")
    return output_video_path

In [12]:
run_rt_detr_sahi_inference_on_video("rtdetr-x", input_video_path, output_video_path_dir)

Используемое устройство: cuda
SAHI + rtdetr-x обработка: 705 кадров, 29 FPS


rtdetr-x+SAHI: 100%|██████████| 705/705 [05:28<00:00,  2.15it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\rtdetr-x_sahi_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/rtdetr-x_sahi_output.mp4')

# SAHI_and_RT-DETR_pretrained_inference
## BoT-SORT_post-processing_with_REID

In [12]:
import sys
from pathlib import Path

BOT_SORT_PATH = project_root / "BoT-SORT"

if not (BOT_SORT_PATH / "tracker" / "bot_sort.py").exists():
    raise FileNotFoundError(
        f"Папка BoT-SORT не найдена по пути: {BOT_SORT_PATH.resolve()}\n"
        "Выполните в терминале (в папке проекта):\n"
        "git clone https://github.com/NirAharon/BoT-SORT.git"
    )

# === КЛЮЧЕВОЕ ИЗМЕНЕНИЕ: добавляем КОРНЕВУЮ папку BoT-SORT в sys.path ===
sys.path.insert(0, str(BOT_SORT_PATH))

# Теперь этот импорт будет работать
from tracker.bot_sort import BoTSORT

from sahi.models.ultralytics import UltralyticsDetectionModel
from sahi.predict import get_sliced_prediction


def run_rt_detr_sahi_inference_on_video(
    model_name: str,
    input_video_path: Path,
    output_dir: Path,
    slice_height: int = 640,
    slice_width: int = 640,
    overlap_height_ratio: float = 0.2,
    overlap_width_ratio: float = 0.2,
    confidence_threshold: float = 0.3
) -> Path:
    """
    SAHI + RT-DETR + BoT-SORT трекинг людей (class_id=0).
    Использует оригинальный BoT-SORT с ReID.
    """
    validate_video_file(input_video_path)
    cap, meta = open_video_capture(input_video_path)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Устройство: {device}")

    detection_model = UltralyticsDetectionModel(
        model_path=f"{model_name}.pt",
        confidence_threshold=confidence_threshold,
        device=device
    )

    output_dir.mkdir(exist_ok=True)
    output_path = output_dir / f"{model_name}_sahi_botsort_output.mp4"
    out = cv2.VideoWriter(str(output_path), cv2.VideoWriter_fourcc(*'mp4v'), meta["fps"], (meta["width"], meta["height"]))

    # === Инициализация BoT-SORT с ReID ===
    from argparse import Namespace

    reid_config = str(BOT_SORT_PATH / "fast_reid" / "configs" / "MOT17" / "sbs_S50.yml")
    reid_weights = str(BOT_SORT_PATH / "pretrained" / "sbs_S50_market1501.pth")

    tracker_args = Namespace(
        track_high_thresh=0.5,
        track_low_thresh=0.1,
        new_track_thresh=0.4,
        track_buffer=30,
        match_thresh=0.8,
        proximity_thresh=0.5,
        appearance_thresh=0.2,
        cmc_method='sparseOptFlow',
        with_reid=True,
        fast_reid_config=reid_config,
        fast_reid_weights=reid_weights,
        device='cuda',
        name='BoT-SORT',      
        ablation=False,
        mot20=False        
    )
    tracker = BoTSORT(args=tracker_args, frame_rate=meta["fps"])

    print(f"Обработка: SAHI + {model_name} + BoT-SORT (с ReID), {meta['total_frames']} кадров")

    for _ in tqdm(range(meta["total_frames"]), desc="Трекинг"):
        ret, frame = cap.read()
        if not ret:
            break

        # SAHI inference
        result = get_sliced_prediction(
            image=frame,
            detection_model=detection_model,
            slice_height=slice_height,
            slice_width=slice_width,
            overlap_height_ratio=overlap_height_ratio,
            overlap_width_ratio=overlap_width_ratio,
            verbose=0
        )

        # Собираем только людей (class_id = 0)
        dets = []
        for obj in result.object_prediction_list:
            if obj.category.id != 0:
                continue
            x1, y1, x2, y2 = obj.bbox.to_xyxy()
            conf = obj.score.value
            dets.append([x1, y1, x2, y2, conf])
        dets = np.array(dets) if dets else np.empty((0, 5))

        # Обновление трекера
        tracked_objects = tracker.update(dets, frame)

        # Функция для получения цвета по ID
        def get_color_from_id(track_id):
            np.random.seed(track_id)
            return np.random.randint(0, 255, size=3).tolist()

        # Отрисовка
        for track in tracked_objects:
            x1, y1, x2, y2 = map(int, track.tlbr)
            track_id = int(track.track_id)
            color = get_color_from_id(track_id)

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'ID: {track_id}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        out.write(frame)

    cap.release()
    out.release()
    print(f"Результат сохранён: {output_path.resolve()}")
    return output_path

In [13]:
run_rt_detr_sahi_inference_on_video("rtdetr-x", input_video_path, output_video_path_dir)

Устройство: cuda


Skip loading parameter 'backbone.layer1.0.downsample.1.weight' to the model due to incompatible shapes: (256,) in the checkpoint but (256, 64, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'backbone.layer2.0.downsample.1.weight' to the model due to incompatible shapes: (512,) in the checkpoint but (512, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'backbone.layer3.0.downsample.1.weight' to the model due to incompatible shapes: (1024,) in the checkpoint but (1024, 512, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'backbone.layer4.0.downsample.1.weight' to the model due to incompatible shapes: (2048,) in the checkpoint but (2048, 1024, 1, 1) in the model! You might want to double check if this is expected.


Обработка: SAHI + rtdetr-x + BoT-SORT (с ReID), 705 кадров


Трекинг: 100%|██████████| 705/705 [05:15<00:00,  2.24it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\rtdetr-x_sahi_botsort_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/rtdetr-x_sahi_botsort_output.mp4')

# SAHI_and_Yolov8_pretrained_inference
## BoT-SORT_post-processing_with_REID

In [14]:
run_rt_detr_sahi_inference_on_video("yolov8x", input_video_path, output_video_path_dir)

Устройство: cuda


Skip loading parameter 'backbone.layer1.0.downsample.1.weight' to the model due to incompatible shapes: (256,) in the checkpoint but (256, 64, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'backbone.layer2.0.downsample.1.weight' to the model due to incompatible shapes: (512,) in the checkpoint but (512, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'backbone.layer3.0.downsample.1.weight' to the model due to incompatible shapes: (1024,) in the checkpoint but (1024, 512, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'backbone.layer4.0.downsample.1.weight' to the model due to incompatible shapes: (2048,) in the checkpoint but (2048, 1024, 1, 1) in the model! You might want to double check if this is expected.


Обработка: SAHI + yolov8x + BoT-SORT (с ReID), 705 кадров


Трекинг: 100%|██████████| 705/705 [04:46<00:00,  2.46it/s]

Результат сохранён: E:\people_real_time_detection_and_tracking\results\yolov8x_sahi_botsort_output.mp4





WindowsPath('E:/people_real_time_detection_and_tracking/results/yolov8x_sahi_botsort_output.mp4')