In [3]:
from ultralytics import YOLOv10
import supervision as sv
import cv2
import numpy as np
import requests
import torch
import time
import gc
import threading
from queue import Queue
from collections import defaultdict
from matplotlib import pyplot as plt

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Device:', device)
name_model = "yolov10L"
model = YOLOv10.from_pretrained(f'jameslahm/{name_model}')
model.to(device)

Device: cuda


YOLOv10(
  (model): YOLOv10DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=Tru

# Video

In [5]:
video_path = './data/lange_10.mp4'
output_path = './result/lange_10_YOLO_10L.mp4'

cap = cv2.VideoCapture(video_path)

# Получение информации о видео
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')

# Создание объекта для записи видео
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))


# Base

In [6]:
fps_start_time = time.time()
total_frames_all = 0
confidence_threshold = 0.5
vehicle_classes = [2, 3, 5, 7]  # 2 - car, 3 - motorcycle, 5 - bus, 7 - truck


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  

    results = model(frame_rgb, classes=vehicle_classes)
    results = results[0]

    boxes = results.boxes

    coords = boxes.xyxy.cpu().numpy()  
    confs = boxes.conf.cpu().numpy()  
    classes = boxes.cls.cpu().numpy()  

    for box, conf, cls in zip(coords, confs, classes):
        if conf > confidence_threshold:
            x1, y1, x2, y2 = map(int, box)
            label = f'{model.names[int(cls)]} {conf:.2f}'
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 4)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4)

    out.write(frame)

    total_frames_all += 1

fps_end_time = time.time()
average_fps = total_frames_all / (fps_end_time - fps_start_time)

print("Average FPS:", average_fps)

cap.release()
out.release()
cv2.destroyAllWindows()
torch.cuda.empty_cache()
gc.collect()



0: 384x640 6 cars, 1 truck, 118.6ms
Speed: 5.0ms preprocess, 118.6ms inference, 148.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 89.1ms
Speed: 4.0ms preprocess, 89.1ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 87.5ms
Speed: 2.5ms preprocess, 87.5ms inference, 6.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 87.1ms
Speed: 4.0ms preprocess, 87.1ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 cars, 88.5ms
Speed: 2.0ms preprocess, 88.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 cars, 89.1ms
Speed: 3.0ms preprocess, 89.1ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 88.0ms
Speed: 2.0ms preprocess, 88.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 89.5ms
Speed: 4.0ms preprocess, 89.5ms inference, 5.5ms postprocess per image at shape (1, 3, 384, 

60