In [7]:
from ultralytics import YOLO
import numpy as np
import cv2
from collections import defaultdict

In [145]:
model = YOLO("yolov8x.pt")

video = cv2.VideoCapture("test_video.mp4")
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Цветовые модели HSV
hsv_orange_lower = np.array([10, 100, 100], dtype="uint8")
hsv_orange_upper = np.array([25, 255, 255], dtype="uint8")
hsv_green_lower = np.array([40, 50, 50], dtype="uint8")
hsv_green_upper = np.array([80, 100, 100], dtype="uint8")

# Создание видеозаписи
fourcc = cv2.VideoWriter_fourcc(*'XVID')
writer = cv2.VideoWriter('output.mp4', fourcc, 20.0, (width, height))

# Хранение траекторий в виде {1: [(10;20), (11;20), ...], ...}
track_history = defaultdict(lambda: [])
   

In [146]:
def person_has_helmet(det, frame):
    x1, y1, x2, y2 = det
    person_roi = frame[y1:int(y1 + (y2 - y1) / 3), x1:x2]  
    hsv = cv2.cvtColor(person_roi, cv2.COLOR_BGR2HSV)  
    mask_orange = cv2.inRange(hsv, hsv_orange_lower, hsv_orange_upper)
    mask_green = cv2.inRange(hsv, hsv_green_lower, hsv_green_upper)

    # Считаем количество оранжевых и зеленых пикселей
    orange_pixels = np.sum(mask_orange)
    green_pixels = np.sum(mask_green)

    # Вычисляем площадь области головы
    head_area = (y2 - y1) / 3 * (x2 - x1) 

    return (orange_pixels / head_area > 0.01) or (green_pixels / head_area > 0.4) 


In [147]:
while(video.isOpened()):
    ret, frame = video.read()   
    if not ret:
        break

    results = model.track(frame, persist=True, conf=0.1, iou=0.75, classes=[0])
        
    detections = results[0].boxes.xyxy.cpu().numpy().astype(int)
    track_ids = results[0].boxes.id.int().cpu().tolist()

    # наличие/отсутствие шлема у каждого обнаруженного объекта
    no_helmet = [not person_has_helmet(det, frame) for det in detections]
        
     # рисование прямоугольников с индексами и линий траектории
    for index, det in enumerate(detections):
        x1, y1, x2, y2 = det
        track_id = track_ids[index]  

        # Рисование прямоугольника
        if no_helmet[index]:
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        else:
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Рисование траектории
        if no_helmet[index]:
            x = (x1 + x2) // 2
            y = (y1 + y2) // 2
            track_history[track_id].append((x, y))
            if len(track_history[track_id]) > 100:
                track_history[track_id].pop(0)
            points = np.hstack(track_history[track_id]).astype(np.int32).reshape((-1, 1, 2))
            cv2.polylines(frame, [points], isClosed=False, color=(0, 0, 255), thickness=4)

    cv2.putText(frame, f'People: {len(detections)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Video", frame)
    writer.write(frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

video.release()
writer.release()
cv2.destroyAllWindows()



0: 384x640 4 persons, 1741.6ms
Speed: 2.0ms preprocess, 1741.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1723.6ms
Speed: 2.0ms preprocess, 1723.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1726.6ms
Speed: 2.0ms preprocess, 1726.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1720.6ms
Speed: 2.0ms preprocess, 1720.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1716.6ms
Speed: 2.0ms preprocess, 1716.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1723.0ms
Speed: 2.0ms preprocess, 1723.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1715.8ms
Speed: 2.0ms preprocess, 1715.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1714.6ms
Speed: 2.0ms preprocess, 1714.6ms inference, 2.0ms 