Автор: Лейман М.А.   
Дата создания: 22.03.2025  

# Применение модели  
### детектирование объектов на видео и фото  

### Модель __YOLOv8n__ (Ultralytics YOLOv8 Nano)

проверка работы обученной модели  


In [1]:
# Параметры
video_path = "video/video3.avi"  # Путь к входному видео
s_output_video_path = "video/video_yolo_s.avi"  # Путь к выходному видео
s_model_path = "models/11s_best.pt"  # Файл обученной модели

n_output_video_path = "video/video_yolo_n.avi"  # Путь к выходному видео
n_model_path = "models/11n_best.pt"  # Файл обученной модели




from ultralytics import YOLO
import cv2


def video_detection(model_path, video_path, output_video_path):
    model = YOLO(model_path)

    # Настройки видео
    screen_width = 1024
    screen_height = 1024
    conf = 0.4
    iou = 0.5  # Можно снизить для более агрессивного NMS

    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, 20, (screen_width, screen_height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_resized = cv2.resize(frame, (screen_width, screen_height))

        # Используем ByteTrack для уменьшения дрожания
        results = model.track(frame_resized, persist=True, conf=conf, iou=iou, agnostic_nms=True)

        for result in results:
            boxes = result.boxes.xyxy.cpu().numpy()
            labels = result.boxes.cls.cpu().numpy()
            scores = result.boxes.conf.cpu().numpy()
            track_ids = result.boxes.id.cpu().numpy() if result.boxes.id is not None else [None] * len(boxes)

            # for box, label, score, track_id in zip(boxes, labels, scores, track_ids):
            #     x1, y1, x2, y2 = map(int, box)
            #     frame_resized = cv2.rectangle(frame_resized, (x1, y1), (x2, y2), (0, 255, 0), 2)
                
            #     if track_id is not None:
            #         cv2.putText(frame_resized, f'ID: {int(track_id)}', (x1, y1 - 10),
            #                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)


            for box, label, score in zip(boxes, labels, scores):
                x1, y1, x2, y2 = map(int, box)
                class_name = model.names[int(label)]  # Получаем название класса
                cv2.rectangle(frame_resized, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame_resized, f'{class_name} {score:.2f}', (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        out.write(frame_resized)
        cv2.imshow('Frame', frame_resized)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()


In [3]:
video_detection(s_model_path, video_path = "video/video7.avi", output_video_path = "video/video_yolo_s3.avi")


0: 512x512 2 vehicles, 6.1ms
Speed: 0.9ms preprocess, 6.1ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 1 vehicle, 6.4ms
Speed: 1.5ms preprocess, 6.4ms inference, 0.9ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 1 vehicle, 6.0ms
Speed: 1.1ms preprocess, 6.0ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 1 vehicle, 5.5ms
Speed: 1.3ms preprocess, 5.5ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 1 vehicle, 5.6ms
Speed: 1.1ms preprocess, 5.6ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 2 vehicles, 5.3ms
Speed: 1.2ms preprocess, 5.3ms inference, 0.8ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 2 vehicles, 5.9ms
Speed: 1.2ms preprocess, 5.9ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 2 vehicles, 5.5ms
Speed: 1.2ms preprocess, 5.5ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 5

In [7]:
video_detection(n_model_path, video_path = "video/video6.avi", output_video_path = "video/video_yolo_n.avi")


0: 512x512 12 buildings, 2 prefabricated-houses, 7.3ms
Speed: 1.4ms preprocess, 7.3ms inference, 1.6ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 12 buildings, 2 prefabricated-houses, 7.3ms
Speed: 2.4ms preprocess, 7.3ms inference, 4.5ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 12 buildings, 2 prefabricated-houses, 7.4ms
Speed: 1.5ms preprocess, 7.4ms inference, 12.1ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 12 buildings, 2 prefabricated-houses, 6.8ms
Speed: 3.5ms preprocess, 6.8ms inference, 5.0ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 12 buildings, 2 prefabricated-houses, 7.7ms
Speed: 2.8ms preprocess, 7.7ms inference, 11.8ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 11 buildings, 2 prefabricated-houses, 6.2ms
Speed: 2.1ms preprocess, 6.2ms inference, 2.6ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 11 buildings, 2 prefabricated-houses, 8.6ms
Speed: 3.8ms preprocess, 8.6ms