## Установка библиотек

In [None]:
!pip install ultralytics opencv-python cython cython_bbox loguru lap lapjv
!pip uninstall -y numpy scipy
!pip install numpy==1.23.5 scipy==1.10.1 --force-reinstall

!git clone https://github.com/ifzhang/ByteTrack.git
%cd ByteTrack
!pip install -r requirements.txt
!pip install -v -e .

!pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
%cd /content/ByteTrack/
!pip3 install -r requirements.txt

## Подключение данных с диска

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Обучение модели YOLO

In [None]:
from ultralytics import YOLO

model = YOLO('yolo11x-obb.pt')

result = model.train(data='/content/drive/MyDrive/project/YOLO_dataset2/data.yaml', epochs=100, imgsz=640, batch=8)

## Обработка видео с помощью YOLO и ByteTrack

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import random
import time
from yolox.tracker.byte_tracker import BYTETracker

# Класс для аргументов трекера
class Args:
    def __init__(self):
        self.track_thresh = 0.5
        self.track_buffer = 60 * 2
        self.match_thresh = 0.8
        self.mot20 = False

# Гомография
H = np.array([
     [ 3.01724526e-01,  7.96760283e-01, -9.53384731e+01],
     [-1.85323315e-01,  9.01755294e-01,  2.43311032e+01],
     [-8.30545354e-05,  1.28294896e-03,  1.00000000e+00]
], dtype=np.float32)

# Настройки выходного изображения
MAX_TRAJECTORY_LENGTH = 60 * 2
MAX_MISSED_FRAMES = 60

# Цвета объектов
color_map = {}

# Генерация цветов для идентификаторов объектов
def get_bright_color_by_id(track_id):
    if track_id not in color_map:
        random.seed(track_id)
        h = random.randint(0, 179)
        s = 255
        v = 255
        hsv = np.uint8([[[h, s, v]]])
        bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)[0][0]
        color_map[track_id] = tuple(int(x) for x in bgr)
    return color_map[track_id]

trajectories = {}
last_seen_frame = {}

def warp_point(H, point):
    px = np.array([point[0], point[1], 1.0])
    dst = H @ px
    dst /= dst[2]
    return int(dst[0]), int(dst[1])

def draw_obb_with_ids(image, obb_tensor, tracked_objects):
    for obj in tracked_objects:
        track_id = obj.track_id
        x_c = obj.tlwh[0] + obj.tlwh[2] / 2
        y_c = obj.tlwh[1] + obj.tlwh[3] / 2
        color = get_bright_color_by_id(track_id)

        closest_box = None
        min_dist = float('inf')
        for item in obb_tensor:
            x, y, w, h, angle_rad, conf, cls_id = item.cpu().numpy()
            dist = np.hypot(x - x_c, y - y_c)
            if dist < min_dist:
                min_dist = dist
                closest_box = (x, y, w, h, angle_rad, conf, cls_id)

        if closest_box:
            x, y, w, h, angle_rad, conf, cls_id = closest_box
            angle_deg = angle_rad * 180 / np.pi
            rect = ((x, y), (w, h), angle_deg)
            box = cv2.boxPoints(rect).astype(int)
            cv2.drawContours(image, [box], 0, color, 2)
            label = f"ID {track_id}"
            cv2.putText(image, label, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
    return image

class BYTETrackerObject:
    def __init__(self, tlwh, score, cls_id, track_id):
        self.tlwh = tlwh
        self.score = score
        self.cls_id = cls_id
        self.track_id = track_id

def convert_obb_to_bbox(obb_tensor):
    bboxes, scores, cls_ids = [], [], []
    for item in obb_tensor:
        x, y, w, h, angle_rad, conf, cls_id = item.cpu().numpy()
        x1, y1 = x - w / 2, y - h / 2
        x2, y2 = x + w / 2, y + h / 2
        bboxes.append([x1, y1, x2, y2, conf])
        scores.append(conf)
        cls_ids.append(cls_id)
    if bboxes:
        return np.array(bboxes), np.array(scores), np.array(cls_ids)
    return np.empty((0, 5)), np.array([]), np.array([])

def process_video_with_tracking(input_path, output_obb_path, output_map_path, model_path):
    model = YOLO(model_path)
    cap = cv2.VideoCapture(input_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # fps = cap.get(cv2.CAP_PROP_FPS)
    width, height = int(cap.get(3)), int(cap.get(4))
    # out_obb = cv2.VideoWriter(output_obb_path, fourcc, fps, (width, height))

    fps = 30
    # width, height = 640, 640
    out_obb = cv2.VideoWriter(output_obb_path, fourcc, fps, (width, height))
    out_obb.set(cv2.VIDEOWRITER_PROP_QUALITY, 70)  # 70% качества

    map_img = cv2.imread("/content/drive/MyDrive/project/paint_map.png")
    map_h, map_w = map_img.shape[:2]
    out_map = cv2.VideoWriter(output_map_path, fourcc, fps, (map_w, map_h))

    args = Args()
    args.track_buffer = MAX_MISSED_FRAMES

    byte_tracker = BYTETracker(args, frame_rate=fps)

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (width, height))
        results = model(frame)[0]
        obb_data = results.obb.data if (results.obb is not None and results.obb.data.numel() > 0) else None

        tracked_objects = []
        if obb_data is not None:
            bboxes, scores, cls_ids = convert_obb_to_bbox(obb_data)
            if len(bboxes) > 0:

                dets = np.concatenate([bboxes[:, :4], scores[:, np.newaxis]], axis=1)

                img_info = frame.shape[:2]
                img_size = frame.shape[:2]

                online_targets = byte_tracker.update(dets, img_info, img_size)

                for track, cls_id in zip(online_targets, cls_ids):
                    tracked_objects.append(BYTETrackerObject(track.tlwh, track.score, cls_id, track.track_id))

        for obj in tracked_objects:
            track_id = obj.track_id
            x_c = obj.tlwh[0] + obj.tlwh[2] / 2
            y_c = obj.tlwh[1] + obj.tlwh[3] / 2
            pt_on_map = warp_point(H, (x_c, y_c))

            if track_id not in trajectories:
                trajectories[track_id] = []
            trajectories[track_id].append(pt_on_map)
            if len(trajectories[track_id]) > MAX_TRAJECTORY_LENGTH:
                trajectories[track_id] = trajectories[track_id][-MAX_TRAJECTORY_LENGTH:]

            last_seen_frame[track_id] = frame_idx

        ids_to_delete = [track_id for track_id, last_seen in last_seen_frame.items()
                         if frame_idx - last_seen > MAX_MISSED_FRAMES]
        for track_id in ids_to_delete:
            last_seen_frame.pop(track_id, None)
            trajectories.pop(track_id, None)

        map_copy = map_img.copy()
        for track_id, points in trajectories.items():
            color = get_bright_color_by_id(track_id)
            alpha_step = 1 / len(points) if len(points) > 1 else 1
            for i in range(1, len(points)):
                p1, p2 = points[i - 1], points[i]
                alpha = int(255 * (i * alpha_step))
                faded_color = tuple(int(c * alpha / 255) for c in color)
                cv2.line(map_copy, p1, p2, faded_color, 2)
            if points:
                cv2.putText(map_copy, f"ID {track_id}", points[-1], cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        if obb_data is not None:
            frame = draw_obb_with_ids(frame, obb_data, tracked_objects)

        if frame_idx % 2 == 0:
            out_obb.write(frame)
        # out_obb.write(frame)
        out_map.write(map_copy)
        frame_idx += 1

    cap.release()
    out_obb.release()
    out_map.release()
    print("✅ Видео с детекцией сохранено:", output_obb_path)
    print("✅ Видео с траекториями на карте сохранено:", output_map_path)

# Запуск
process_video_with_tracking(
    input_path="/content/drive/MyDrive/project/new_test.mp4",
    output_obb_path="/content/output_with_obb(new_test).mp4",
    output_map_path="/content/output_on_map(new_test).mp4",
    model_path="/content/drive/MyDrive/project/new_best.pt"
)


0: 384x640 None66.6ms
Speed: 3.3ms preprocess, 66.6ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None39.6ms
Speed: 3.7ms preprocess, 39.6ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.4ms
Speed: 3.0ms preprocess, 38.4ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.3ms
Speed: 3.1ms preprocess, 38.3ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.4ms
Speed: 4.1ms preprocess, 38.4ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.4ms
Speed: 3.4ms preprocess, 38.4ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.3ms
Speed: 3.4ms preprocess, 38.3ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.3ms
Speed: 3.3ms preprocess, 38.3ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 None38.3ms
Speed: 2.5ms prep

## Обработка фото

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import random
from yolox.tracker.byte_tracker import BYTETracker

# Класс для аргументов
class Args:
    def __init__(self):
        self.track_thresh = 0.5
        self.track_buffer = 60 * 2
        self.match_thresh = 0.8
        self.mot20 = False

# Гомография
H = np.array([
    [3.96852726e-01, 7.24499584e-01, -1.16155480e+02],
    [-1.68093627e-01, 8.89571158e-01, 2.07623167e+01],
    [1.33553666e-04, 1.05162718e-03, 1.00000000e+00]
], dtype=np.float32)

MAX_TRAJECTORY_LENGTH = 60

color_map = {}
trajectories = {}

def get_bright_color_by_id(track_id):
    if track_id not in color_map:
        random.seed(track_id)
        h = random.randint(0, 179)
        s = 255
        v = 255
        hsv = np.uint8([[[h, s, v]]])
        bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)[0][0]
        color_map[track_id] = tuple(int(x) for x in bgr)
    return color_map[track_id]

def warp_point(H, point):
    px = np.array([point[0], point[1], 1.0])
    dst = H @ px
    dst /= dst[2]
    return int(dst[0]), int(dst[1])

def draw_obb_with_ids(image, obb_tensor, tracked_objects):
    for obj in tracked_objects:
        track_id = obj.track_id
        x_c = obj.tlwh[0] + obj.tlwh[2] / 2
        y_c = obj.tlwh[1] + obj.tlwh[3] / 2
        color = get_bright_color_by_id(track_id)

        closest_box = None
        min_dist = float('inf')
        for item in obb_tensor:
            x, y, w, h, angle_rad, conf, cls_id = item.cpu().numpy()
            dist = np.hypot(x - x_c, y - y_c)
            if dist < min_dist:
                min_dist = dist
                closest_box = (x, y, w, h, angle_rad, conf, cls_id)

        if closest_box:
            x, y, w, h, angle_rad, conf, cls_id = closest_box
            angle_deg = angle_rad * 180 / np.pi
            rect = ((x, y), (w, h), angle_deg)
            box = cv2.boxPoints(rect).astype(int)
            cv2.drawContours(image, [box], 0, color, 2)
            label = f"ID {track_id}"
            cv2.putText(image, label, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
    return image

class BYTETrackerObject:
    def __init__(self, tlwh, score, cls_id, track_id):
        self.tlwh = tlwh
        self.score = score
        self.cls_id = cls_id
        self.track_id = track_id

def convert_obb_to_bbox(obb_tensor):
    bboxes, scores, cls_ids = [], [], []
    for item in obb_tensor:
        x, y, w, h, angle_rad, conf, cls_id = item.cpu().numpy()
        x1, y1 = x - w / 2, y - h / 2
        x2, y2 = x + w / 2, y + h / 2
        bboxes.append([x1, y1, x2, y2, conf])
        scores.append(conf)
        cls_ids.append(cls_id)
    if bboxes:
        return np.array(bboxes), np.array(scores), np.array(cls_ids)
    return np.empty((0, 5)), np.array([]), np.array([])

def process_image_with_tracking(input_img_path, output_obb_path, output_map_path, model_path):
    model = YOLO(model_path)
    image = cv2.imread(input_img_path)
    map_img = cv2.imread("/content/drive/MyDrive/project/paint_map.png")

    args = Args()
    byte_tracker = BYTETracker(args, frame_rate=30)

    results = model(image)[0]
    obb_data = results.obb.data if (results.obb is not None and results.obb.data.numel() > 0) else None

    tracked_objects = []
    if obb_data is not None:
        bboxes, scores, cls_ids = convert_obb_to_bbox(obb_data)
        if len(bboxes) > 0:
            dets = np.concatenate([bboxes[:, :4], scores[:, np.newaxis]], axis=1)
            img_info = image.shape[:2]
            img_size = image.shape[:2]
            online_targets = byte_tracker.update(dets, img_info, img_size)
            for track, cls_id in zip(online_targets, cls_ids):
                tracked_objects.append(BYTETrackerObject(track.tlwh, track.score, cls_id, track.track_id))

    # Построение траектории
    for obj in tracked_objects:
        track_id = obj.track_id
        x_c = obj.tlwh[0] + obj.tlwh[2] / 2
        y_c = obj.tlwh[1] + obj.tlwh[3] / 2
        pt_on_map = warp_point(H, (x_c, y_c))

        if track_id not in trajectories:
            trajectories[track_id] = []
        trajectories[track_id].append(pt_on_map)
        if len(trajectories[track_id]) > MAX_TRAJECTORY_LENGTH:
            trajectories[track_id] = trajectories[track_id][-MAX_TRAJECTORY_LENGTH:]

    map_copy = map_img.copy()
    for track_id, points in trajectories.items():
        color = get_bright_color_by_id(track_id)
        alpha_step = 1 / len(points) if len(points) > 1 else 1
        for i in range(1, len(points)):
            p1, p2 = points[i - 1], points[i]
            alpha = int(255 * (i * alpha_step))
            faded_color = tuple(int(c * alpha / 255) for c in color)
            cv2.line(map_copy, p1, p2, faded_color, 2)
        if points:
            cv2.putText(map_copy, f"ID {track_id}", points[-1], cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    if obb_data is not None:
        image = draw_obb_with_ids(image, obb_data, tracked_objects)

    cv2.imwrite(output_obb_path, image)
    cv2.imwrite(output_map_path, map_copy)
    print("✅ Изображение с детекцией сохранено:", output_obb_path)
    print("✅ Карта с траекториями сохранена:", output_map_path)

# Запуск
process_image_with_tracking(
    input_img_path="/content/drive/MyDrive/project/4_img586.png",
    output_obb_path="/content/output_with_obb(4_img586).png",
    output_map_path="/content/output_on_map(4_img586).png",
    model_path="/content/drive/MyDrive/project/new_best.pt"
)



0: 384x640 None66.6ms
Speed: 3.1ms preprocess, 66.6ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)
✅ Изображение с детекцией сохранено: /content/output_with_obb(4_img586).png
✅ Карта с траекториями сохранена: /content/output_on_map(4_img586).png
