In [None]:
import torch
from ultralytics import YOLO
import cv2

def process_video(model_path, video_path, output_path, conf_threshold=0.5):
    
    model = YOLO(model_path)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Ошибка открытия видеофайла")
        return
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (640, 640))

    
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        #frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
        frame = cv2.resize(frame, (640, 640), interpolation=cv2.INTER_LINEAR)
        
        results = model(frame, conf=conf_threshold)

        annotated_frame = results[0].plot()

        out.write(annotated_frame)
        
        frame_count += 1
    
    cap.release()
    out.release()
    print(f"Видео сохранено как: {output_path}")

if __name__ == "__main__":
    
    model = "best.pt" 
    source_video = "3_1_trim.MOV"
    out_video = "3_1_output_trim.mp4"
    conf = 0.6 
    
    # Запуск обработки
    process_video(
        model_path=model,
        video_path=source_video,
        output_path=out_video,
        conf_threshold=conf
    )

Обработка видео: 3_1_trim.MOV
Размер: 2160x3840, FPS: 30.07053457606937, Всего кадров: 298

0: 640x640 2 bowls, 2 meats, 2 soups, 1 tea, 58.2ms
Speed: 4.5ms preprocess, 58.2ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 bowls, 2 meats, 2 soups, 1 tea, 58.0ms
Speed: 6.2ms preprocess, 58.0ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 bowls, 2 meats, 2 soups, 1 tea, 49.2ms
Speed: 6.5ms preprocess, 49.2ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 bowls, 2 meats, 2 soups, 1 tea, 49.2ms
Speed: 6.8ms preprocess, 49.2ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 bowls, 2 meats, 2 soups, 1 tea, 30.2ms
Speed: 4.7ms preprocess, 30.2ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 bowls, 2 meats, 2 soups, 1 tea, 30.0ms
Speed: 5.9ms preprocess, 30.0ms inference, 4.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x6