In [None]:
import os
import math
import subprocess as sp
from dataclasses import dataclass, field
from enum import IntEnum

import numpy as np
import cv2
import torch
from scipy.optimize import linear_sum_assignment

from ultralytics import YOLOv10
from torchreid.utils import FeatureExtractor


In [None]:
# ==== Конфигурация детектора / ROI / трекинга ====

# Порог для "достаточно уверенного бокса", который мы используем для обновления Калмана
TRACK_CONF_THR = 0.85

# Минимальная высота бокса в пикселях (ниже — игнорируем как "слишком маленький")
MIN_BOX_HEIGHT = 24

# ROI в относительных координатах (0..1) кадра: только объекты полностью внутри ROI участвуют в трекинге
ROI_X1_REL = 0.0
ROI_X2_REL = 0.1
ROI_Y1_REL = 0.5
ROI_Y2_REL = 0.1

# Максимальное число appearance-фич, используемых для усреднения в треке
REID_MAX_FEATURES = 20

# Для каких классов включаем ReID (пока только люди)
# COCO: 0 = person
REID_ENABLED_CLASSES = {0}

# ==== Список классов COCO (80) для подписи на кадре ====
COCO_CLASSES = [
    "person","bicycle","car","motorcycle","airplane","bus","train","truck","boat",
    "traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat",
    "dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack",
    "umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball",
    "kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket",
    "bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple",
    "sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair",
    "couch","potted plant","bed","dining table","toilet","tv","laptop","mouse",
    "remote","keyboard","cell phone","microwave","oven","toaster","sink",
    "refrigerator","book","clock","vase","scissors","teddy bear","hair drier",
    "toothbrush",
]


In [None]:
# ==== Загрузка YOLOv10-M c HF (COCO) ====

device = 0 if torch.cuda.is_available() else "cpu"
print("YOLO device:", device)

yolo_model = YOLOv10.from_pretrained("jameslahm/yolov10m")


In [None]:
def get_video_info(path: str):
    """
    Возвращает (width, height, fps) для видео файлa path (через ffprobe).
    """
    cmd = [
        "ffprobe",
        "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=width,height,r_frame_rate",
        "-of", "default=noprint_wrappers=1:nokey=1",
        path,
    ]
    out = sp.check_output(cmd).decode("utf-8").strip().split("\n")
    width = int(out[0])
    height = int(out[1])
    num, den = out[2].split("/")
    fps = float(num) / float(den)
    return width, height, fps


def ffmpeg_frame_generator(path: str, resize_to=None):
    """
    Генератор кадров BGR np.ndarray из видео через ffmpeg.
    resize_to: (w,h) или None (оставить оригинальный размер).
    """
    orig_w, orig_h, _ = get_video_info(path)
    if resize_to is None:
        out_w, out_h = orig_w, orig_h
        vf_args = []
    else:
        out_w, out_h = resize_to
        vf_args = ["-vf", f"scale={out_w}:{out_h}"]

    cmd = [
        "ffmpeg",
        "-i", path,
        "-loglevel", "error",
        *vf_args,
        "-f", "rawvideo",
        "-pix_fmt", "bgr24",
        "pipe:1",
    ]
    proc = sp.Popen(cmd, stdout=sp.PIPE, bufsize=10**8)

    frame_size = out_w * out_h * 3
    try:
        while True:
            raw = proc.stdout.read(frame_size)
            if len(raw) != frame_size:
                break
            frame = np.frombuffer(raw, np.uint8).reshape((out_h, out_w, 3))
            yield frame
    finally:
        if proc.stdout is not None:
            proc.stdout.close()
        proc.wait()


def create_ffmpeg_writer(output_path: str, width: int, height: int, fps: float):
    """
    Возвращает subprocess.Popen с открытым stdin для записи сырых кадров BGR.
    """
    cmd = [
        "ffmpeg",
        "-y",
        "-f", "rawvideo",
        "-vcodec", "rawvideo",
        "-pix_fmt", "bgr24",
        "-s", f"{width}x{height}",
        "-r", f"{fps}",
        "-i", "pipe:0",
        "-an",
        "-vcodec", "libx264",
        "-pix_fmt", "yuv420p",
        output_path,
    ]
    proc = sp.Popen(cmd, stdin=sp.PIPE, bufsize=10**8)
    return proc


In [None]:
def xyxy_to_xyah(boxes_xyxy: np.ndarray) -> np.ndarray:
    """
    Преобразование (x1,y1,x2,y2) -> (x_c, y_c, a=w/h, h), OC-SORT style.
    boxes_xyxy: (N,4)
    """
    if boxes_xyxy.size == 0:
        return np.zeros((0, 4), dtype=np.float32)
    x1, y1, x2, y2 = boxes_xyxy[:, 0], boxes_xyxy[:, 1], boxes_xyxy[:, 2], boxes_xyxy[:, 3]
    w = x2 - x1
    h = y2 - y1
    x_c = x1 + 0.5 * w
    y_c = y1 + 0.5 * h
    a = w / (h + 1e-6)
    return np.stack([x_c, y_c, a, h], axis=-1).astype(np.float32)


def xyah_to_xyxy(boxes_xyah: np.ndarray) -> np.ndarray:
    """
    Обратное преобразование (x_c, y_c, a, h) -> (x1,y1,x2,y2).
    """
    if boxes_xyah.size == 0:
        return np.zeros((0, 4), dtype=np.float32)
    x_c, y_c, a, h = boxes_xyah[:, 0], boxes_xyah[:, 1], boxes_xyah[:, 2], boxes_xyah[:, 3]
    w = a * h
    x1 = x_c - 0.5 * w
    y1 = y_c - 0.5 * h
    x2 = x_c + 0.5 * w
    y2 = y_c + 0.5 * h
    return np.stack([x1, y1, x2, y2], axis=-1).astype(np.float32)


def iou_matrix(boxes1_xyxy: np.ndarray, boxes2_xyxy: np.ndarray) -> np.ndarray:
    """
    IoU матрица для двух наборов боксов (N1,4) и (N2,4).
    """
    if boxes1_xyxy.size == 0 or boxes2_xyxy.size == 0:
        return np.zeros((boxes1_xyxy.shape[0], boxes2_xyxy.shape[0]), dtype=np.float32)

    b1 = boxes1_xyxy[:, None, :]  # (N1,1,4)
    b2 = boxes2_xyxy[None, :, :]  # (1,N2,4)

    x1 = np.maximum(b1[..., 0], b2[..., 0])
    y1 = np.maximum(b1[..., 1], b2[..., 1])
    x2 = np.minimum(b1[..., 2], b2[..., 2])
    y2 = np.minimum(b1[..., 3], b2[..., 3])

    inter_w = np.maximum(0.0, x2 - x1)
    inter_h = np.maximum(0.0, y2 - y1)
    inter = inter_w * inter_h

    area1 = (b1[..., 2] - b1[..., 0]) * (b1[..., 3] - b1[..., 1])
    area2 = (b2[..., 2] - b2[..., 0]) * (b2[..., 3] - b2[..., 1])

    union = area1 + area2 - inter + 1e-6
    iou = inter / union
    return iou.astype(np.float32)


In [None]:
class KalmanFilterXYAH:
    """
    Простая реализация Калмана для состояния:
      x = [x, y, a, h, vx, vy, va]^T

    Наблюдение:
      z = [x, y, a, h]^T
    """

    def __init__(self, dt: float = 1.0 / 30.0):
        self.dt = float(dt)

        # Размерности
        self.dim_x = 7
        self.dim_z = 4

        # Матрица перехода
        self.F = np.eye(self.dim_x, dtype=np.float32)
        self.F[0, 4] = self.dt
        self.F[1, 5] = self.dt
        self.F[2, 6] = self.dt

        # Матрица наблюдения
        self.H = np.zeros((self.dim_z, self.dim_x), dtype=np.float32)
        self.H[0, 0] = 1.0  # x
        self.H[1, 1] = 1.0  # y
        self.H[2, 2] = 1.0  # a
        self.H[3, 3] = 1.0  # h

        # Начальные ковариации
        self.P = np.eye(self.dim_x, dtype=np.float32) * 10.0

        # Шум процесса
        q_pos = 1.0
        q_vel = 10.0
        self.Q = np.diag([q_pos, q_pos, q_pos, q_pos, q_vel, q_vel, q_vel]).astype(np.float32)

        # Шум наблюдения
        self.R = np.diag([10.0, 10.0, 1.0, 10.0]).astype(np.float32)

        self.x = np.zeros((self.dim_x,), dtype=np.float32)
        self.initialized = False

    def initiate(self, z: np.ndarray):
        """
        Инициализация по одному наблюдению z=[x,y,a,h].
        """
        z = np.asarray(z, dtype=np.float32).reshape(-1)
        self.x[:] = 0.0
        self.x[0:4] = z[0:4]
        self.P = np.eye(self.dim_x, dtype=np.float32) * 10.0
        self.initialized = True

    def predict(self):
        if not self.initialized:
            return
        self.x = self.F @ self.x
        self.P = self.F @ self.P @ self.F.T + self.Q

    def project(self):
        """
        Проекция в пространство наблюдений:
        возвращает (z_pred, S) — ожидаемое наблюдение и ковариацию.
        """
        if not self.initialized:
            z_pred = np.zeros((self.dim_z,), dtype=np.float32)
            S = np.eye(self.dim_z, dtype=np.float32)
            return z_pred, S
        z_pred = self.H @ self.x
        S = self.H @ self.P @ self.H.T + self.R
        return z_pred.astype(np.float32), S.astype(np.float32)

    def update(self, z: np.ndarray):
        """
        Обновление по наблюдению z=[x,y,a,h].
        """
        if not self.initialized:
            self.initiate(z)
            return

        z = np.asarray(z, dtype=np.float32).reshape(-1)
        z_pred, S = self.project()
        y = z - z_pred  # innovation

        K = self.P @ self.H.T @ np.linalg.inv(S + 1e-9 * np.eye(self.dim_z, dtype=np.float32))

        self.x = self.x + K @ y
        I = np.eye(self.dim_x, dtype=np.float32)
        self.P = (I - K @ self.H) @ self.P


In [None]:
# ==== ReID: OSNet (MSMT17) для людей ====

REID_PERSON_CLASS = 0  # COCO: 0 = person

osnet_device = "cuda" if torch.cuda.is_available() else "cpu"
print("OSNet device:", osnet_device)

# !!! ВАЖНО: поменяй путь на фактический путь к весам osnet_x1_0_msmt17.pt !!!
OSNET_WEIGHTS_PATH = r"osnet_x1_0_msmt17.pth"

person_reid_extractor = FeatureExtractor(
    model_name="osnet_x1_0",
    model_path=OSNET_WEIGHTS_PATH,
    device=osnet_device,
)
print("[ReID] OSNet x1_0 MSMT17 загружен")


In [None]:
def maha_distance_matrix(tracks_xyah: np.ndarray, tracks_S: np.ndarray, dets_xyah: np.ndarray) -> np.ndarray:
    """
    Махаланобисова дистанция d^2 между предсказанными треками (mean z_pred)
    и наблюдениями dets_xyah.
    tracks_xyah: (T,4) — z_pred
    tracks_S:   (T,4,4) — ковариации в пространстве наблюдений
    dets_xyah:  (N,4)
    """
    T = tracks_xyah.shape[0]
    N = dets_xyah.shape[0]
    if T == 0 or N == 0:
        return np.zeros((T, N), dtype=np.float32)

    d2 = np.zeros((T, N), dtype=np.float32)
    for t in range(T):
        S_inv = np.linalg.inv(tracks_S[t] + 1e-9 * np.eye(4, dtype=np.float32))
        diff = dets_xyah - tracks_xyah[t]  # (N,4)
        d2[t] = np.einsum("ni,ij,nj->n", diff, S_inv, diff)
    return d2


def extract_person_reid_features(frame_bgr: np.ndarray, det: dict):
    """
    frame_bgr : (H,W,3) BGR
    det       : словарь результата run_yolo10_on_frame

    Возвращает:
      features   : (N, 512) float32, L2-нормированные
      valid_mask : (N,) bool — True там, где фича реально посчитана (person)
    """
    boxes = np.asarray(det["xyxy"], dtype=np.float32)  # (N,4)
    cls   = np.asarray(det["cls"],   dtype=np.int32)
    N = boxes.shape[0]

    if N == 0:
        return np.zeros((0, 512), dtype=np.float32), np.zeros((0,), dtype=bool)

    H, W, _ = frame_bgr.shape

    crops   = []
    idx_map = []

    for i in range(N):
        if cls[i] != REID_PERSON_CLASS:
            continue

        x1, y1, x2, y2 = boxes[i]
        x1 = max(0, min(W - 1, int(x1)))
        y1 = max(0, min(H - 1, int(y1)))
        x2 = max(0, min(W - 1, int(x2)))
        y2 = max(0, min(H - 1, int(y2)))

        if x2 <= x1 or y2 <= y1:
            continue

        crop_bgr = frame_bgr[y1:y2, x1:x2, :]
        if crop_bgr.size == 0:
            continue

        crop_bgr = cv2.resize(crop_bgr, (128, 256), interpolation=cv2.INTER_LINEAR)
        crop_rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB)

        crops.append(crop_rgb)
        idx_map.append(i)

    features   = np.zeros((N, 512), dtype=np.float32)
    valid_mask = np.zeros((N,),       dtype=bool)

    if len(crops) == 0:
        return features, valid_mask

    with torch.no_grad():
        feats = person_reid_extractor(crops)
        if isinstance(feats, torch.Tensor):
            feats = feats.cpu().numpy()

    for j, det_idx in enumerate(idx_map):
        v = feats[j].astype(np.float32)
        n = np.linalg.norm(v) + 1e-12
        v /= n
        features[det_idx] = v
        valid_mask[det_idx] = True

    return features, valid_mask




def byte_maha_associate(
    tracks_xyah: np.ndarray,
    tracks_S: np.ndarray,
    dets_xyxy: np.ndarray,
    dets_xyah: np.ndarray,
    dets_scores: np.ndarray,
    valid_size_mask: np.ndarray,
    inside_roi_mask: np.ndarray,
    thr_high: float,
    thr_low: float,
    iou_thresh_high: float,
    iou_thresh_low: float,
    gate_thresh: float,
    alpha: float,
    beta: float,
    appearance_cost_high: np.ndarray | None,
    track_classes: np.ndarray,
    det_classes: np.ndarray,
):
    """
    Каскадная ассоциация ByteTrack + Махаланобис + Венгерский.

    Вход:
      tracks_xyah   : (T,4) — z_pred треков (x,y,a,h)
      tracks_S      : (T,4,4) — ковариации в пространстве наблюдений
      dets_xyxy     : (N,4) — детекции (x1,y1,x2,y2)
      dets_xyah     : (N,4) — те же детекции в формате (x,y,a,h)
      dets_scores   : (N,)  — score детекций
      valid_size_mask : (N,) bool — достаточно крупные боксы
      inside_roi_mask : (N,) bool — полностью внутри ROI
      thr_high, thr_low: пороги ByteTrack
      iou_thresh_high, iou_thresh_low: пороги по IoU
      gate_thresh   : порог по Махаланобис-квадрату
      alpha, beta   : веса для комбинированной стоимости α·(1−IoU)+β·d_app
      appearance_cost_high : (T,N) или None — d_app для HIGH-детекций
      track_classes : (T,) int
      det_classes   : (N,) int

    Выход:
      словарь с полями:
        "matches_high"       : (K1,2) int (t_idx, d_idx)
        "matches_low"        : (K2,2) int (t_idx, d_idx)
        "unmatched_tracks"   : (T_u,) int
        "unmatched_dets_high": (D_h_u,) int
        "unmatched_dets_low" : (D_l_u,) int
    """
    T = tracks_xyah.shape[0]
    N = dets_xyxy.shape[0]

    dets_scores = np.asarray(dets_scores, dtype=np.float32)

    # --- разбиение на high / low по ByteTrack ---
    high_mask = (dets_scores >= thr_high) & valid_size_mask & inside_roi_mask
    low_mask  = (dets_scores >= thr_low) & (dets_scores < thr_high) & valid_size_mask & inside_roi_mask

    high_idxs = np.where(high_mask)[0]
    low_idxs  = np.where(low_mask)[0]

    matches_high = []
    matches_low  = []

    all_tracks_idx = np.arange(T, dtype=np.int32)
    unmatched_tracks = all_tracks_idx.copy()
    unmatched_dets_high = high_idxs.copy()
    unmatched_dets_low  = low_idxs.copy()

    LARGE = 1e6

    # --- A) HIGH-конфиденс стадия ---
    if T > 0 and high_idxs.size > 0:
        dets_h_xyxy = dets_xyxy[high_idxs]
        dets_h_xyah = dets_xyah[high_idxs]
        dets_h_cls  = det_classes[high_idxs]

        # IoU
        tracks_xyxy = xyah_to_xyxy(tracks_xyah)
        iou_h = iou_matrix(tracks_xyxy, dets_h_xyxy)          # (T,Nh)
        iou_dist_h = 1.0 - iou_h

        # Mahalanobis
        maha_h = maha_distance_matrix(tracks_xyah, tracks_S, dets_h_xyah)
        gate_mask = maha_h <= gate_thresh

        # appearance (если есть)
        if appearance_cost_high is not None and appearance_cost_high.shape == iou_h.shape:
            d_app_h = appearance_cost_high[:, high_idxs]
        else:
            d_app_h = np.zeros_like(iou_h, dtype=np.float32)

        cost_h = alpha * iou_dist_h + beta * d_app_h

        # инвалидация по классу + гейтинг
        for t in range(T):
            for j, d_global in enumerate(high_idxs):
                if track_classes[t] != det_classes[d_global]:
                    cost_h[t, j] = LARGE
                elif not gate_mask[t, j]:
                    cost_h[t, j] = LARGE

        row_ind, col_ind = linear_sum_assignment(cost_h)

        matched_tracks_mask = np.zeros(T, dtype=bool)
        matched_dets_mask_h = np.zeros(high_idxs.size, dtype=bool)

        for r, c in zip(row_ind, col_ind):
            if cost_h[r, c] >= LARGE:
                continue
            if iou_h[r, c] < iou_thresh_high:
                continue

            t_idx = r
            d_idx = high_idxs[c]
            matches_high.append((t_idx, d_idx))
            matched_tracks_mask[t_idx] = True
            matched_dets_mask_h[c] = True

        unmatched_tracks    = all_tracks_idx[~matched_tracks_mask]
        unmatched_dets_high = high_idxs[~matched_dets_mask_h]

    # --- B) LOW-конфиденс стадия ByteTrack (только IoU+motion) ---
    if unmatched_tracks.size > 0 and low_idxs.size > 0:
        tracks_u_xyah = tracks_xyah[unmatched_tracks]
        tracks_u_S    = tracks_S[unmatched_tracks]
        tracks_u_cls  = track_classes[unmatched_tracks]

        dets_l_xyxy = dets_xyxy[low_idxs]
        dets_l_xyah = dets_xyah[low_idxs]
        dets_l_cls  = det_classes[low_idxs]

        tracks_u_xyxy = xyah_to_xyxy(tracks_u_xyah)
        iou_l = iou_matrix(tracks_u_xyxy, dets_l_xyxy)
        iou_dist_l = 1.0 - iou_l

        maha_l = maha_distance_matrix(tracks_u_xyah, tracks_u_S, dets_l_xyah)
        gate_mask_l = maha_l <= gate_thresh

        cost_l = iou_dist_l.copy()

        for ti, t_idx in enumerate(unmatched_tracks):
            for j, d_global in enumerate(low_idxs):
                if tracks_u_cls[ti] != dets_l_cls[j]:
                    cost_l[ti, j] = LARGE
                elif not gate_mask_l[ti, j]:
                    cost_l[ti, j] = LARGE

        row_ind_l, col_ind_l = linear_sum_assignment(cost_l)

        matched_tracks_mask_u = np.zeros(unmatched_tracks.size, dtype=bool)
        matched_dets_mask_l   = np.zeros(low_idxs.size, dtype=bool)

        for r, c in zip(row_ind_l, col_ind_l):
            if cost_l[r, c] >= LARGE:
                continue
            if iou_l[r, c] < iou_thresh_low:
                continue

            t_idx = unmatched_tracks[r]
            d_idx = low_idxs[c]
            matches_low.append((t_idx, d_idx))
            matched_tracks_mask_u[r] = True
            matched_dets_mask_l[c]   = True

        unmatched_tracks   = unmatched_tracks[~matched_tracks_mask_u]
        unmatched_dets_low = low_idxs[~matched_dets_mask_l]

    return {
        "matches_high": np.array(matches_high, dtype=np.int32).reshape(-1, 2),
        "matches_low":  np.array(matches_low,  dtype=np.int32).reshape(-1, 2),
        "unmatched_tracks":   unmatched_tracks,
        "unmatched_dets_high": unmatched_dets_high,
        "unmatched_dets_low":  unmatched_dets_low,
    }



In [None]:
class TrackState(IntEnum):
    TENTATIVE = 0
    CONFIRMED = 1
    LOST = 2
    REMOVED = 3


@dataclass
class Track:
    track_id: int
    kf: "KalmanFilterXYAH"
    class_id: int
    n_init: int
    max_time_lost: int

    state: TrackState = TrackState.TENTATIVE
    hits: int = 1
    age: int = 1
    time_since_update: int = 0
    score: float = 0.0

    last_xyah: np.ndarray = field(default_factory=lambda: np.zeros(4, dtype=np.float32))
    last_S:   np.ndarray = field(default_factory=lambda: np.eye(4, dtype=np.float32))

    trajectory_xyxy: list = field(default_factory=list)
    features: list = field(default_factory=list)  # список np.ndarray (appearance-фичи)

    # --- состояния ---

    def is_tentative(self) -> bool:
        return self.state == TrackState.TENTATIVE

    def is_confirmed(self) -> bool:
        return self.state == TrackState.CONFIRMED

    def is_lost(self) -> bool:
        return self.state == TrackState.LOST

    def is_removed(self) -> bool:
        return self.state == TrackState.REMOVED

    # --- инициализация ---

    def initiate_from_detection(self, meas_xyah: np.ndarray, score: float, feature: np.ndarray | None = None):
        self.kf.initiate(meas_xyah)
        z_pred, S = self.kf.project()
        self.last_xyah = z_pred
        self.last_S = S
        self.score = float(score)
        self.age = 1
        self.hits = 1
        self.time_since_update = 0

        xyxy = xyah_to_xyxy(self.last_xyah[None, :])[0]
        self.trajectory_xyxy.append(xyxy)

        if feature is not None:
            self.features.append(feature)

    # --- предсказание ---

    def predict(self):
        if not self.kf.initialized:
            return

        self.kf.predict()
        z_pred, S = self.kf.project()
        self.last_xyah = z_pred
        self.last_S = S

        self.age += 1
        self.time_since_update += 1

        xyxy = xyah_to_xyxy(self.last_xyah[None, :])[0]
        self.trajectory_xyxy.append(xyxy)

    # --- обновление по детекции ---

    def update(
        self,
        meas_xyah: np.ndarray,
        score: float,
        feature: np.ndarray | None = None,
        use_kalman: bool = True,
    ):
        self.time_since_update = 0
        self.hits += 1
        self.score = float(score)

        if use_kalman:
            self.kf.update(meas_xyah)

        z_pred, S = self.kf.project()
        self.last_xyah = z_pred
        self.last_S = S

        xyxy = xyah_to_xyxy(self.last_xyah[None, :])[0]
        if self.trajectory_xyxy:
            self.trajectory_xyxy[-1] = xyxy
        else:
            self.trajectory_xyxy.append(xyxy)

        if feature is not None:
            self.features.append(feature)

        if self.state == TrackState.TENTATIVE and self.hits >= self.n_init:
            self.state = TrackState.CONFIRMED
        elif self.state == TrackState.LOST:
            self.state = TrackState.CONFIRMED

    # --- "пропуск" кадра ---

    def mark_missed(self):
        if self.state == TrackState.TENTATIVE:
            self.state = TrackState.REMOVED
        elif self.state in (TrackState.CONFIRMED, TrackState.LOST):
            if self.time_since_update > self.max_time_lost:
                self.state = TrackState.REMOVED
            else:
                self.state = TrackState.LOST

    # --- геттеры ---

    def current_xyxy(self) -> np.ndarray:
        return xyah_to_xyxy(self.last_xyah[None, :])[0]

    # --- ID-банк (усреднение последних K фич) ---

    def get_feature_centroid(self, max_k: int = REID_MAX_FEATURES) -> np.ndarray | None:
        """
        Возвращает L2-нормированный центроид последних max_k appearance-фич
        или None, если фич ещё нет.
        """
        if not self.features:
            return None

        feats = self.features[-int(max_k):]
        arr = np.stack(feats, axis=0).astype(np.float32)

        # ещё раз L2-нормируем
        norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
        arr = arr / norms

        centroid = arr.mean(axis=0)
        c_norm = float(np.linalg.norm(centroid))
        if c_norm > 0.0:
            centroid /= c_norm
        return centroid


In [None]:
def compute_appearance_cost_matrix(
    tracks: list[Track],
    det_classes: np.ndarray,
    det_features: np.ndarray,
    enabled_classes: set[int] = REID_ENABLED_CLASSES,
    max_features: int = REID_MAX_FEATURES,
) -> np.ndarray:
    """
    d_app(t,d) = 1 - cos_sim(track_centroid, det_feature)
    Для неподдерживаемых классов или отсутствующих фич — 0.
    """
    if not tracks:
        return np.zeros((0, det_features.shape[0]), dtype=np.float32)

    det_classes  = np.asarray(det_classes,  dtype=np.int32)
    det_features = np.asarray(det_features, dtype=np.float32)

    T = len(tracks)
    N = det_features.shape[0]

    if N == 0:
        return np.zeros((T, 0), dtype=np.float32)

    cost_app = np.zeros((T, N), dtype=np.float32)

    for ti, tr in enumerate(tracks):
        if tr.class_id not in enabled_classes:
            continue

        centroid = tr.get_feature_centroid(max_k=max_features)
        if centroid is None:
            continue

        centroid = centroid.astype(np.float32)
        centroid /= (np.linalg.norm(centroid) + 1e-12)

        mask = (det_classes == tr.class_id)
        idxs = np.where(mask)[0]
        if idxs.size == 0:
            continue

        feats = det_features[idxs]
        feats /= (np.linalg.norm(feats, axis=1, keepdims=True) + 1e-12)

        sims = feats @ centroid
        sims = np.clip(sims, -1.0, 1.0)
        d_app = 1.0 - sims

        cost_app[ti, idxs] = d_app.astype(np.float32)

    return cost_app



In [None]:
class Tracker:
    """
    Трекер OC-SORT/DeepSORT-стиля:

      - Калман [x,y,a,h,vx,vy,va]
      - ByteTrack + Махаланобис + Венгерский
      - Жизненный цикл треков
      - appearance (OSNet) через Track.features + get_feature_centroid
    """

    def __init__(
        self,
        fps: float,
        n_init: int = 3,
        max_time_lost: int = 30,
        thr_high: float = 0.5,
        thr_low: float = 0.1,
        iou_thresh_high: float = 0.3,
        iou_thresh_low: float = 0.1,
        gate_thresh: float = 5.99,
        alpha: float = 0.5,
        beta: float = 0.5,
        track_conf_thr: float = TRACK_CONF_THR,
    ):
        self.dt = 1.0 / float(fps)
        self.n_init = int(n_init)
        self.max_time_lost = int(max_time_lost)

        self.thr_high = float(thr_high)
        self.thr_low = float(thr_low)
        self.iou_thresh_high = float(iou_thresh_high)
        self.iou_thresh_low = float(iou_thresh_low)
        self.gate_thresh = float(gate_thresh)
        self.alpha = float(alpha)
        self.beta = float(beta)
        self.track_conf_thr = float(track_conf_thr)

        self.tracks: list[Track] = []
        self._next_id: int = 1

    # --- внутренний помощник для создания трека ---

    def _spawn_track(
        self,
        meas_xyah: np.ndarray,
        score: float,
        class_id: int,
        feature: np.ndarray | None = None,
    ):
        kf = KalmanFilterXYAH(dt=self.dt)
        tr = Track(
            track_id=self._next_id,
            kf=kf,
            class_id=int(class_id),
            n_init=self.n_init,
            max_time_lost=self.max_time_lost,
        )
        tr.initiate_from_detection(meas_xyah, score, feature=feature)
        self.tracks.append(tr)
        self._next_id += 1

    # --- основной шаг трекера ---

    def update(
        self,
        det: dict,
        det_features: np.ndarray | None = None,
    ):
        """
        Обновление трекера по детекциям одного кадра.
        det — словарь от run_yolo10_on_frame(...)
        det_features — (N,512) appearance-фичи детекций (OSNet).
        """
        # удалить REMOVED
        self.tracks = [t for t in self.tracks if not t.is_removed()]

        # предсказать все треки
        for t in self.tracks:
            t.predict()

        T = len(self.tracks)
        if T > 0:
            tracks_xyah = np.stack([t.last_xyah for t in self.tracks], axis=0)
            tracks_S    = np.stack([t.last_S   for t in self.tracks], axis=0)
            track_classes = np.array([t.class_id for t in self.tracks], dtype=np.int32)
        else:
            tracks_xyah = np.zeros((0, 4), dtype=np.float32)
            tracks_S    = np.zeros((0, 4, 4), dtype=np.float32)
            track_classes = np.zeros((0,), dtype=np.int32)

        dets_xyxy        = np.asarray(det["xyxy"],            dtype=np.float32)
        scores           = np.asarray(det["scores"],          dtype=np.float32)
        det_classes      = np.asarray(det["cls"],             dtype=np.int32)
        valid_size_mask  = np.asarray(det["valid_size_mask"], dtype=bool)
        inside_roi_mask  = np.asarray(det["inside_roi_mask"], dtype=bool)

        N = dets_xyxy.shape[0]
        if N == 0:
            for t in self.tracks:
                t.mark_missed()
            self.tracks = [t for t in self.tracks if not t.is_removed()]
            return self.tracks

        dets_xyah = xyxy_to_xyah(dets_xyxy)

        # appearance-cost для HIGH-конфиденс стадии
        appearance_cost_high = None
        if det_features is not None and T > 0:
            appearance_cost_high = compute_appearance_cost_matrix(
                tracks=self.tracks,
                det_classes=det_classes,
                det_features=det_features,
                enabled_classes=REID_ENABLED_CLASSES,
                max_features=REID_MAX_FEATURES,
            )

        assoc = byte_maha_associate(
            tracks_xyah=tracks_xyah,
            tracks_S=tracks_S,
            dets_xyxy=dets_xyxy,
            dets_xyah=dets_xyah,
            dets_scores=scores,
            valid_size_mask=valid_size_mask,
            inside_roi_mask=inside_roi_mask,
            thr_high=self.thr_high,
            thr_low=self.thr_low,
            iou_thresh_high=self.iou_thresh_high,
            iou_thresh_low=self.iou_thresh_low,
            gate_thresh=self.gate_thresh,
            alpha=self.alpha,
            beta=self.beta,
            appearance_cost_high=appearance_cost_high,
            track_classes=track_classes,
            det_classes=det_classes,
        )

        matches_high       = assoc["matches_high"]
        matches_low        = assoc["matches_low"]
        unmatched_tracks   = assoc["unmatched_tracks"]
        unmatched_dets_high = assoc["unmatched_dets_high"]
        unmatched_dets_low  = assoc["unmatched_dets_low"]

        matched_tracks_mask = np.zeros(T, dtype=bool)

        # HIGH-конфиденс обновления
        for t_idx, d_idx in matches_high:
            matched_tracks_mask[t_idx] = True
            meas_xyah = dets_xyah[d_idx]
            sc        = scores[d_idx]
            feat      = det_features[d_idx] if det_features is not None else None
            use_kalman = (sc >= self.track_conf_thr)
            self.tracks[t_idx].update(meas_xyah, sc, feature=feat, use_kalman=use_kalman)

        # LOW-конфиденс ByteTrack
        for t_idx, d_idx in matches_low:
            matched_tracks_mask[t_idx] = True
            meas_xyah = dets_xyah[d_idx]
            sc        = scores[d_idx]
            feat      = det_features[d_idx] if det_features is not None else None
            use_kalman = (sc >= self.track_conf_thr)
            self.tracks[t_idx].update(meas_xyah, sc, feature=feat, use_kalman=use_kalman)

        # треки без матча
        for t_idx, tr in enumerate(self.tracks):
            if not matched_tracks_mask[t_idx]:
                tr.mark_missed()

        # новые треки из unmatched HIGH
        for d_idx in unmatched_dets_high:
            meas_xyah = dets_xyah[d_idx]
            sc        = scores[d_idx]
            class_id  = det_classes[d_idx]
            feat      = det_features[d_idx] if det_features is not None else None
            self._spawn_track(meas_xyah, sc, int(class_id), feature=feat)

        # REMOVED — убрать
        self.tracks = [t for t in self.tracks if not t.is_removed()]

        return self.tracks

In [None]:
def compute_roi_rect(frame_shape):
    h, w, _ = frame_shape
    x1 = int(ROI_X1_REL * w)
    x2 = int(ROI_X2_REL * w)
    y1 = int(ROI_Y1_REL * h)
    y2 = int(ROI_Y2_REL * h)
    return x1, y1, x2, y2

def run_yolo10_on_frame(frame_bgr: np.ndarray, conf_thr: float = 0.85):
    """
    Запускает YOLOv10 на кадре, возвращает словарь с детекциями и масками:
      xyxy, scores, cls, valid_size_mask, inside_roi_mask
    """
    H, W, _ = frame_bgr.shape
    roi_x1, roi_y1, roi_x2, roi_y2 = compute_roi_rect(frame_bgr.shape)

    # === YOLOv10 ===
    with torch.no_grad():
        results = yolo_model.predict(
            frame_bgr,
            conf=conf_thr,
            iou=0.5,
            device=device,
            verbose=False,
            imgsz=960,
        )[0]

    if results.boxes is None or len(results.boxes) == 0:
        return {
            "xyxy": np.zeros((0, 4), dtype=np.float32),
            "scores": np.zeros((0,), dtype=np.float32),
            "cls": np.zeros((0,), dtype=np.int32),
            "valid_size_mask": np.zeros((0,), dtype=bool),
            "inside_roi_mask": np.zeros((0,), dtype=bool),
            "roi_xyxy": (roi_x1, roi_y1, roi_x2, roi_y2),
        }

    boxes_xyxy = results.boxes.xyxy.cpu().numpy().astype(np.float32)
    scores = results.boxes.conf.cpu().numpy().astype(np.float32)
    cls = results.boxes.cls.cpu().numpy().astype(np.int32)

    # фильтрация по размеру и ROI
    h = boxes_xyxy[:, 3] - boxes_xyxy[:, 1]
    valid_size_mask = h >= MIN_BOX_HEIGHT

    inside_roi_mask = (
        (boxes_xyxy[:, 0] >= roi_x1) &
        (boxes_xyxy[:, 1] >= roi_y1) &
        (boxes_xyxy[:, 2] <= roi_x2) &
        (boxes_xyxy[:, 3] <= roi_y2)
    )

    return {
        "xyxy": boxes_xyxy,
        "scores": scores,
        "cls": cls,
        "valid_size_mask": valid_size_mask,
        "inside_roi_mask": inside_roi_mask,
        "roi_xyxy": (roi_x1, roi_y1, roi_x2, roi_y2),
    }

def draw_detections(frame_bgr: np.ndarray, det: dict) -> np.ndarray:
    """
    Отрисовка ROI и детекций с цветовой схемой:
      - красный   — слишком маленький бокс (valid_size=False)
      - оранжевый — достаточный размер, но score < TRACK_CONF_THR (внутри ROI)
      - зелёный   — внутри ROI, достаточный размер и score >= TRACK_CONF_THR
      - синий     — валиден по размеру, но вне ROI
    """
    img = frame_bgr.copy()
    H, W, _ = img.shape

    roi_x1, roi_y1, roi_x2, roi_y2 = det["roi_xyxy"]

    # полупрозрачный бирюзовый ROI
    overlay = img.copy()
    cv2.rectangle(
        overlay,
        (roi_x1, roi_y1),
        (roi_x2, roi_y2),
        (255, 255, 0),  # BGR (голубой/бирюзовый)
        thickness=-1,
    )
    alpha = 0.15
    img = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)

    boxes = det["xyxy"]
    scores = det["scores"]
    cls = det["cls"]
    valid_size_mask = det["valid_size_mask"]
    inside_roi_mask = det["inside_roi_mask"]

    for i in range(boxes.shape[0]):
        x1, y1, x2, y2 = boxes[i]
        p1 = (int(x1), int(y1))
        p2 = (int(x2), int(y2))

        if not valid_size_mask[i]:
            color = (0, 0, 255)        # красный
        else:
            if inside_roi_mask[i]:
                if scores[i] >= TRACK_CONF_THR:
                    color = (0, 255, 0)  # зелёный
                else:
                    color = (0, 165, 255)  # оранжевый
            else:
                color = (255, 0, 0)    # синий

        cv2.rectangle(img, p1, p2, color, 1)

        class_id = int(cls[i])
        name = COCO_CLASSES[class_id] if 0 <= class_id < len(COCO_CLASSES) else str(class_id)
        label = f"{name} {scores[i]:.2f}"

        cv2.putText(
            img,
            label,
            (p1[0], p1[1] - 5),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.42,
            color,
            1,
            cv2.LINE_AA,
        )

    return img




In [None]:
def draw_tracks_on_frame(
    frame_bgr: np.ndarray,
    tracks: list[Track],
    draw_tentative: bool = True,
    draw_lost: bool = False,
    traj_len: int = 30,
) -> np.ndarray:
    """
    Отрисовка треков поверх кадра:
      - CONFIRMED — зелёный
      - TENTATIVE — жёлтый
      - LOST      — фиолетовый (если draw_lost=True)
    + короткая траектория (последние traj_len центров).
    """
    img = frame_bgr.copy()

    for tr in tracks:
        if tr.is_removed():
            continue

        if tr.is_confirmed():
            color = (0, 255, 0)
        elif tr.is_tentative():
            if not draw_tentative:
                continue
            color = (0, 255, 255)
        elif tr.is_lost():
            if not draw_lost:
                continue
            color = (255, 0, 255)
        else:
            color = (255, 255, 255)

        box_xyxy = tr.current_xyxy()
        x1, y1, x2, y2 = box_xyxy
        p1 = (int(x1), int(y1))
        p2 = (int(x2), int(y2))

        cv2.rectangle(img, p1, p2, color, 1)

        label = f"ID {tr.track_id} | c{tr.class_id}"
        cv2.putText(
            img,
            label,
            (p1[0], p1[1] - 7),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.42,
            color,
            1,
            cv2.LINE_AA,
        )

        # траектория
        if tr.trajectory_xyxy:
            traj = tr.trajectory_xyxy[-traj_len:]
            pts = []
            for bx in traj:
                cx = 0.5 * (bx[0] + bx[2])
                cy = 0.5 * (bx[1] + bx[3])
                pts.append((int(cx), int(cy)))

            if len(pts) >= 2:
                pts_arr = np.array(pts, dtype=np.int32).reshape((-1, 1, 2))
                cv2.polylines(img, [pts_arr], isClosed=False, color=color, thickness=1)

    return img


In [None]:
video_path = r"vid.mp4"
output_path = r"output_with_tracks_osnet.mp4"

orig_w, orig_h, fps = get_video_info(video_path)
print(f"Видео: {orig_w}x{orig_h}, fps={fps:.3f}")

tracker = Tracker(
    fps=fps,
    n_init=3,
    max_time_lost=30,
    thr_high=0.5,
    thr_low=0.1,
    iou_thresh_high=0.3,
    iou_thresh_low=0.1,
    gate_thresh=5.99,      # можно ужать относительно 9.48
    alpha=0.5,
    beta=0.5,
    track_conf_thr=TRACK_CONF_THR,
)

gen = ffmpeg_frame_generator(video_path, resize_to=None)
writer_proc = create_ffmpeg_writer(output_path, orig_w, orig_h, fps)

try:
    for idx, frame in enumerate(gen):
        # # 1) детекции YOLOv10
        # det = run_yolo10_on_frame(frame, conf_thr=0.25)

        # # 2) OSNet-фичи для людей
        # det_features, det_valid_mask = extract_person_reid_features(frame, det)

        # # 3) обновление трекера
        # tracks = tracker.update(det, det_features=det_features)

        det = run_yolo10_on_frame(frame, conf_thr=0.25)
        det_features, _ = extract_person_reid_features(frame, det)
        tracks = tracker.update(det, det_features=det_features)

        # 4) визуализация
        frame_det = draw_detections(frame, det)
        frame_out = draw_tracks_on_frame(frame_det, tracks)

        writer_proc.stdin.write(frame_out.tobytes())

        if idx % 50 == 0:
            print(f"Обработан кадр {idx}")
finally:
    if writer_proc.stdin is not None:
        writer_proc.stdin.close()
    ret = writer_proc.wait()
    print("ffmpeg завершился с кодом", ret)
    print("Готово, результат в:", output_path)
