# YOLO-POSE VISUALIZATION 

In [None]:
# from ultralytics import YOLO

# model = YOLO('yolov8s-pose.pt')

# folder_with_mp4 = "Анализ бригад (телефон)/Есть телефон"

# results = model(source="Анализ бригад (телефон)/Есть телефон/00_51_01.mp4", show=True, vid_stride=1, iou=0.6, conf=0.3, nms=True, device='cpu')

# ULTRA FAST YOLO-POSE

In [6]:
import os
from copy import deepcopy
import csv
import cv2
import numpy as np
from ultralytics import YOLO


# Параметры
THRESHOLD = 100  # Евклидово расстояние для определения близости
UD_THRESHOLD = 30  # Порог вертикального отклонения

# Инициализация моделей
yolo_pose = YOLO('yolov8s-pose.pt')

# Координаты прямоугольника
RECT_X1, RECT_Y1 = 930, 485
RECT_X2, RECT_Y2 = 1425, 295

global total_time_with_person
total_time_with_person = 0
global time_without_hands
time_without_hands = 0
global time_without_person
time_without_person = 0

def wrist_in_rectangle(wrist):
    """
    Checks if the wrist point is inside the defined rectangle.
    
    :param wrist: Coordinates of the wrist.
    :return: True if inside, False otherwise.
    """
    x, y = wrist
    return RECT_X1 <= x <= RECT_X2 and RECT_Y2 <= y <= RECT_Y1


def detect_frame_difference(current_frame, prev_frame, mask=None, diff_threshold=30, pixel_threshold=500):
    """
    Detects significant changes between two frames.

    :param current_frame: Current frame.
    :param prev_frame: Previous frame.
    :param mask: Optional mask to specify region of interest.
    :param diff_threshold: Threshold for pixel difference.
    :param pixel_threshold: Threshold for number of changed pixels.
    :return: True if significant change detected, False otherwise.
    """
    
    # Конвертируем кадры в оттенки серого
    current_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    
    # Применяем маску, если она предоставлена
    if mask is not None:
        current_gray = cv2.bitwise_and(current_gray, current_gray, mask=mask)
        prev_gray = cv2.bitwise_and(prev_gray, prev_gray, mask=mask)
    
    # Вычисляем абсолютное различие между кадрами
    diff = cv2.absdiff(current_gray, prev_gray)
    
    # Применяем порог
    _, thresh_diff = cv2.threshold(diff, diff_threshold, 255, cv2.THRESH_BINARY)
    
    # Считаем ненулевые пиксели
    changed_pixels = cv2.countNonZero(thresh_diff)
    
    return changed_pixels > pixel_threshold


def get_intervals(distractions, gap=60):
    """
    Retrieves intervals of distractions.

    :param distractions: List of detected distraction times.
    :param gap: Gap time to consider for intervals.
    :return: List of intervals.
    """
    if not distractions:
        return []
    
    intervals = [[distractions[0], distractions[0]]]
    for t in distractions[1:]:
        if t - intervals[-1][1] <= gap:
            intervals[-1][1] = t
        else:
            intervals.append([t, t])
    return intervals


def seconds_to_time(seconds):
    """
    Converts seconds to time format MM:SS.

    :param seconds: Time in seconds.
    :return: Time in MM:SS format.
    """
    minutes, seconds = divmod(seconds, 60)
    minutes, seconds = int(minutes), int(seconds)
    if minutes < 10:
        minutes = f"0{minutes}"
    if seconds < 10:
        seconds = f"0{seconds}"
    return f"{minutes}:{seconds}"


def process_all_videos_in_folder(folder_path, SAVE_PATH):
    """
    Processes all videos in the specified folder.

    :param folder_path: Path to the video folder.
    :param SAVE_PATH: Path to save processed outputs.
    """
    # Получаем все файлы в папке
    all_files = os.listdir(folder_path)
    
    # Фильтруем только .mp4 файлы
    mp4_files = [file for file in all_files if file.endswith('.mp4')]
    
    # Обрабатываем каждый файл
    for mp4_file in mp4_files:
        video_path = os.path.join(folder_path, mp4_file)
        video_name = os.path.splitext(mp4_file)[0]  # Убираем расширение
        process_video(video_path, SAVE_PATH, video_name)


def process_video(video_path, SAVE_PATH, video_name):
    """
    Processes a single video.

    :param video_path: Path to the video.
    :param SAVE_PATH: Path to save processed outputs.
    :param video_name: Name of the video.
    """

    global total_time_with_person
    global time_without_hands
    global time_without_person

    distractions = []
    # Создаем папку с именем видео, если она не существует
    video_folder_path = os.path.join(SAVE_PATH, video_name)
    if not os.path.exists(video_folder_path):
        os.mkdir(video_folder_path)
    
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        frame_count += 1
        
        if frame_count % fps != 0:
            continue

        if not ret:
            break
        
        # Прогноз с помощью модели позы
        results_pose = yolo_pose.predict(frame)
        
        # Получаем ключевые точки для всех обнаруженных объектов
        keypoints = results_pose[0].keypoints.xy
        
        distraction_detected = False

        all_wrists = []

        for person_keypoints in keypoints:
            wrists = []  # Список для сохранения координат обеих запястий
            colors = [(128, 0, 128), (0, 255, 255)]  # Фиолетовый и желтый цвета для визуализации

            # Извлекаем ключевые точки запястий
            for idx in [9, 10]:  # Индексы для левого и правого запястья
                try:
                    wrist = person_keypoints[idx].cpu().numpy()
                    all_wrists.append(wrist)
                    wrists.append(wrist)
                except:
                    wrists.append(wrist)
                    continue
            
            # Считаем количество видимых ключевых точек
            visible_keypoints = sum(1 for keypoint in person_keypoints if keypoint is not None)

            # Проверяем, есть ли по крайней мере 15 видимых ключевых точек
            if visible_keypoints < 14:
                continue

            bottom_height_limit = 0.85 * frame.shape[0]  # 85% от высоты изображения
            upper_height_limit = 0.35 * frame.shape[0]  # 35% от высоты изображения

            # Проверяем расстояние между двумя запястьями
            if wrists[0] is not None and wrists[1] is not None:
                distance = np.linalg.norm(np.array(wrists[0]) - np.array(wrists[1]))
                # Проверяем условия для определения отвлечения
                if distance < THRESHOLD and wrists[0][0] < wrists[1][0] and abs(wrists[0][1] - wrists[1][1]) <= UD_THRESHOLD and wrists[0][1] < bottom_height_limit and wrists[1][1] < bottom_height_limit and wrists[0][1] > upper_height_limit and wrists[1][1] > upper_height_limit:
                    distraction_detected = True
                    for wrist, color in zip(wrists, colors):
                        cv2.circle(frame, (int(wrist[0]), int(wrist[1])), 10, color, -1)
                    break

        # После цикла проверяем, находится ли хотя бы одно запястье в прямоугольнике
        hands_in_rect = any([wrist_in_rectangle(wrist) for wrist in all_wrists])

        person_on_right = any([kp[0] > frame.shape[1] / 2 for person_kps in keypoints for kp in person_kps if kp is not None])

        if person_on_right:
            total_time_with_person += 1
            if not hands_in_rect:
                time_without_hands += 1
        else:
            time_without_person += 1

        if distraction_detected:
            frame_count_copy = deepcopy(frame_count)
            distractions.append(frame_count_copy/12)
            print(f"Отвлечение на кадре_{seconds_to_time(frame_count_copy/12)}")
            save_path = os.path.join(video_folder_path, f"{seconds_to_time(frame_count_copy/12)}.jpg")
            cv2.imwrite(save_path, frame)

    intervals = get_intervals(distractions)
    with open(os.path.join(SAVE_PATH, f"{video_name}_intervals.csv"), "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Start Time", "End Time"])
        for interval in intervals:
            writer.writerow([seconds_to_time(interval[0]), seconds_to_time(interval[1])])
        
        # Добавляем статистику по времени
        percentage_without_hands = (time_without_hands / total_time_with_person) * 100 if total_time_with_person != 0 else 0
        writer.writerow([])
        writer.writerow(["Total Time With Person (frames)", total_time_with_person])
        writer.writerow(["Time Without Hands (frames)", time_without_hands])
        writer.writerow(["Time Without Person (frames)", time_without_person])
        writer.writerow(["Percentage Without Hands (%)", percentage_without_hands])

    cap.release()

folder_path = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон"
SAVE_PATH = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/img_with_phones"
SAVE_PATH2 = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/img_without_phones"
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

#process_all_videos_in_folder(folder_path, SAVE_PATH)
video_name = "00_26_30"
process_video(f"/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон/{video_name}.mp4", SAVE_PATH, video_name)


0: 384x640 2 persons, 31.2ms
Speed: 1.7ms preprocess, 31.2ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 30.3ms
Speed: 2.1ms preprocess, 30.3ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 30.9ms
Speed: 2.5ms preprocess, 30.9ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 28.9ms
Speed: 2.1ms preprocess, 28.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 28.8ms
Speed: 2.2ms preprocess, 28.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.5ms
Speed: 3.0ms preprocess, 26.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.3ms
Speed: 2.8ms preprocess, 25.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.3ms
Speed: 2.1ms preprocess, 25.3ms inference, 1.5ms postprocess per image at shape (

Отвлечение на кадре_00:24
Отвлечение на кадре_00:25


0: 384x640 2 persons, 25.4ms
Speed: 2.1ms preprocess, 25.4ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 26.2ms
Speed: 2.1ms preprocess, 26.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_00:26
Отвлечение на кадре_00:27


0: 384x640 2 persons, 26.5ms
Speed: 2.0ms preprocess, 26.5ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 27.0ms
Speed: 2.5ms preprocess, 27.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_00:28
Отвлечение на кадре_00:29


0: 384x640 2 persons, 26.5ms
Speed: 2.2ms preprocess, 26.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 2.3ms preprocess, 25.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_00:30
Отвлечение на кадре_00:31


0: 384x640 2 persons, 26.3ms
Speed: 1.9ms preprocess, 26.3ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.8ms
Speed: 1.9ms preprocess, 24.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.8ms
Speed: 1.7ms preprocess, 24.8ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


Отвлечение на кадре_00:32
Отвлечение на кадре_00:33
Отвлечение на кадре_00:34



0: 384x640 2 persons, 24.5ms
Speed: 1.9ms preprocess, 24.5ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.1ms preprocess, 25.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 3.0ms preprocess, 24.9ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)


Отвлечение на кадре_00:35



0: 384x640 2 persons, 25.9ms
Speed: 2.3ms preprocess, 25.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.1ms
Speed: 2.0ms preprocess, 25.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.5ms
Speed: 1.8ms preprocess, 24.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.4ms preprocess, 25.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 2.0ms preprocess, 24.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.6ms
Speed: 1.9ms preprocess, 24.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.4ms
Speed: 1.7ms preprocess, 25.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.0ms
Speed: 1.8ms preprocess, 25.0ms inference, 2.2ms postprocess per image at shape (

Отвлечение на кадре_01:53
Отвлечение на кадре_01:54


0: 384x640 2 persons, 26.2ms
Speed: 2.2ms preprocess, 26.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.9ms
Speed: 2.2ms preprocess, 25.9ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.8ms
Speed: 1.9ms preprocess, 25.8ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.4ms
Speed: 2.9ms preprocess, 25.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.7ms
Speed: 1.9ms preprocess, 26.7ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.9ms
Speed: 2.0ms preprocess, 25.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.4ms
Speed: 1.7ms preprocess, 25.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.7ms
Speed: 2.4ms preprocess, 25.7ms inference, 1.6ms postprocess per image at shape (1

Отвлечение на кадре_03:07


0: 384x640 2 persons, 24.7ms
Speed: 2.2ms preprocess, 24.7ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.2ms
Speed: 1.7ms preprocess, 25.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.3ms
Speed: 1.9ms preprocess, 25.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.6ms
Speed: 2.1ms preprocess, 24.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.3ms
Speed: 2.2ms preprocess, 25.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 1.7ms preprocess, 24.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.6ms
Speed: 2.2ms preprocess, 24.6ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.0ms
Speed: 2.1ms preprocess, 25.0ms inference, 1.5ms postprocess per image at shape (1

Отвлечение на кадре_04:12


0: 384x640 2 persons, 25.2ms
Speed: 1.8ms preprocess, 25.2ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.6ms
Speed: 2.1ms preprocess, 24.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.1ms
Speed: 1.9ms preprocess, 26.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.5ms
Speed: 1.9ms preprocess, 24.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.1ms
Speed: 1.9ms preprocess, 24.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.9ms
Speed: 2.9ms preprocess, 25.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.1ms
Speed: 2.5ms preprocess, 24.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:19


0: 384x640 2 persons, 24.4ms
Speed: 1.8ms preprocess, 24.4ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 2.1ms preprocess, 24.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.5ms
Speed: 1.9ms preprocess, 24.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.3ms
Speed: 1.9ms preprocess, 24.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.3ms
Speed: 2.3ms preprocess, 25.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.6ms preprocess, 25.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.6ms
Speed: 2.0ms preprocess, 24.6ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.2ms
Speed: 2.1ms preprocess, 26.2ms inference, 2.3ms postprocess per image at shape (1

Отвлечение на кадре_04:28
Отвлечение на кадре_04:30



0: 384x640 2 persons, 25.8ms
Speed: 2.1ms preprocess, 25.8ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.7ms
Speed: 2.0ms preprocess, 26.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.2ms
Speed: 2.2ms preprocess, 26.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)


Отвлечение на кадре_04:31
Отвлечение на кадре_04:33



0: 384x640 2 persons, 26.5ms
Speed: 2.5ms preprocess, 26.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.7ms
Speed: 2.1ms preprocess, 25.7ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:34
Отвлечение на кадре_04:35


0: 384x640 2 persons, 27.2ms
Speed: 2.8ms preprocess, 27.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.4ms
Speed: 2.3ms preprocess, 25.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:36
Отвлечение на кадре_04:37


0: 384x640 2 persons, 26.6ms
Speed: 2.1ms preprocess, 26.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.1ms
Speed: 2.4ms preprocess, 24.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:38
Отвлечение на кадре_04:39


0: 384x640 2 persons, 24.9ms
Speed: 2.3ms preprocess, 24.9ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.1ms preprocess, 25.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.4ms
Speed: 2.1ms preprocess, 25.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:40


0: 384x640 2 persons, 24.7ms
Speed: 2.0ms preprocess, 24.7ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.3ms
Speed: 2.4ms preprocess, 24.3ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.7ms
Speed: 2.2ms preprocess, 24.7ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.2ms
Speed: 2.0ms preprocess, 24.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:44


0: 384x640 2 persons, 25.0ms
Speed: 2.1ms preprocess, 25.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.1ms
Speed: 1.8ms preprocess, 24.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.9ms
Speed: 1.7ms preprocess, 26.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.7ms
Speed: 2.4ms preprocess, 25.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.6ms
Speed: 2.4ms preprocess, 24.6ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 2.5ms preprocess, 24.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 27.2ms
Speed: 2.1ms preprocess, 27.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.4ms
Speed: 2.2ms preprocess, 26.4ms inference, 1.5ms postprocess per image at shape (1

Отвлечение на кадре_04:53
Отвлечение на кадре_04:54


0: 384x640 2 persons, 26.5ms
Speed: 1.9ms preprocess, 26.5ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.4ms preprocess, 25.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 2.1ms preprocess, 25.6ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 27.6ms
Speed: 2.5ms preprocess, 27.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_04:57
Отвлечение на кадре_04:58


0: 384x640 2 persons, 26.3ms
Speed: 1.8ms preprocess, 26.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 3.3ms preprocess, 25.6ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 27.0ms
Speed: 2.4ms preprocess, 27.0ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.5ms
Speed: 2.1ms preprocess, 26.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.3ms
Speed: 2.5ms preprocess, 25.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 2.3ms preprocess, 25.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.4ms
Speed: 2.2ms preprocess, 25.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_05:03


0: 384x640 2 persons, 25.0ms
Speed: 2.0ms preprocess, 25.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.2ms preprocess, 25.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 1.9ms preprocess, 24.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_05:06


0: 384x640 2 persons, 24.4ms
Speed: 2.0ms preprocess, 24.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 25.5ms
Speed: 2.0ms preprocess, 25.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.1ms
Speed: 1.9ms preprocess, 24.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.2ms
Speed: 2.5ms preprocess, 25.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 1.7ms preprocess, 25.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.5ms
Speed: 2.1ms preprocess, 25.5ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.5ms
Speed: 2.4ms preprocess, 24.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)



Отвлечение на кадре_05:13


0: 384x640 2 persons, 24.3ms
Speed: 2.3ms preprocess, 24.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.5ms
Speed: 2.1ms preprocess, 24.5ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.2ms
Speed: 2.2ms preprocess, 24.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.9ms
Speed: 2.2ms preprocess, 24.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 24.7ms
Speed: 1.9ms preprocess, 24.7ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 2.3ms preprocess, 25.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 26.2ms
Speed: 1.9ms preprocess, 26.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 25.6ms
Speed: 2.0ms preprocess, 25.6ms inference, 1.7ms postprocess per image at shape (1

# YOLO-NAS

In [None]:
# %pip install numpy 
# %pip install ultralytics 
# %pip install tensorflow tf-models-official tensorflow_hub 
# %pip install opencv-python 
# %pip install matplotlib 
# %pip install Pillow 
# %pip install super_gradients 
# %pip install torch torchvision pytorch-quantization  

In [None]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO
import super_gradients
import os

coco_interesting_classes=[0,16,17,18,27,31,33,39,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,73,77,84]

# 1. Инициализация моделей
device = torch.device("cuda:0")  # для использования первой GPU

# 1. Инициализация моделей
yolo_nas = super_gradients.training.models.get("yolo_nas_s", pretrained_weights="coco").to(device)
yolo_pose = YOLO('yolov8s-pose.pt')

# Параметры
THRESHOLD = 25  # Евклидово расстояние для определения близости
frames_with_phone = set()

In [None]:
def seconds_to_time(seconds):
    minutes, seconds = divmod(seconds, 60)
    minutes, seconds = int(minutes), int(seconds)
    if minutes < 10:
        minutes = f"0{minutes}"
    if seconds < 10:
        seconds = f"0{seconds}"
    return f"{minutes}:{seconds}"


def process_all_videos_in_folder(folder_path, SAVE_PATH):
    # Получаем список всех файлов в папке
    all_files = os.listdir(folder_path)
    
    # Отбираем только файлы с расширением .mp4
    mp4_files = [file for file in all_files if file.endswith('.mp4')]
    
    # Обрабатываем каждый файл
    for mp4_file in mp4_files:
        video_path = os.path.join(folder_path, mp4_file)
        video_name = os.path.splitext(mp4_file)[0]  # Убираем расширение
        process_video(video_path, SAVE_PATH, video_name)


# Обработка видео
def process_video(video_path, SAVE_PATH, video_name):
    # Создание папки с именем видео, если ее еще нет
    video_folder_path = os.path.join(SAVE_PATH, video_name)
    if not os.path.exists(video_folder_path):
        os.mkdir(video_folder_path)
    
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 0
    nas_conf = 0.3

    while cap.isOpened():
        ret, frame = cap.read()
        frame_count += 1
        
        if frame_count % fps != 0:
            continue

        if not ret:
            break
        
        # Предсказания для каждой модели
        results_pose = yolo_pose.predict(frame)
        results_nas = yolo_nas.predict(frame, conf=nas_conf, iou=0.8)
        
        # Получение bounding box'ов для телефонов
        all_phone_bboxes = []
        for res in results_nas:
            bboxes = res.prediction.bboxes_xyxy
            labels = res.prediction.labels
            confidences = res.prediction.confidence
            class_names = res.class_names

            for i, (bbox, label, confidence) in enumerate(zip(bboxes, labels, confidences)):
                if class_names[label.astype(int)]:
                    bbox_width = bbox[2] - bbox[0]
                    bbox_height = bbox[3] - bbox[1]
    
                    if confidence >= nas_conf and bbox_width < frame.shape[1] / 15 and bbox_height < frame.shape[0] / 15:
                        all_phone_bboxes.append((bbox, confidence))

        phone_detected = False
        # Если bbox телефонов найдены, рассчитываем их центры
        for phone_bbox, confidence in all_phone_bboxes:
            # cv2.rectangle(frame, (int(phone_bbox[0]), int(phone_bbox[1])), (int(phone_bbox[2]), int(phone_bbox[3])), (255, 0, 0), 2)
            # cv2.putText(frame, f"{confidence:.2f}", (int(phone_bbox[0]), int(phone_bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            center_phone = [(phone_bbox[0] + phone_bbox[2]) / 2, (phone_bbox[1] + phone_bbox[3]) / 2]

            # Получаем ключевые точки для всех обнаруженных объектов
            keypoints = results_pose[0].keypoints.xy

            # Для каждого обнаруженного объекта
            for person_keypoints in keypoints:
                wrists = []  # список для хранения координат обеих кистей
                colors = [(128, 0, 128), (0, 255, 255)]  # фиолетовый и жёлтый цвета для визуализации

                # Вытаскиваем ключевые точки для рук
                for idx in [10, 9]:  # индексы для правого и левого запястья
                    try:
                        wrist = person_keypoints[idx].cpu().numpy()
                        wrists.append(wrist)
                    except:
                        wrists.append(None)

                # Визуализируем ключевые точки на кистях
                for wrist, color in zip(wrists, colors):
                    if wrist is not None:
                        cv2.circle(frame, (int(wrist[0]), int(wrist[1])), 10, color, -1)

                # Проверяем расстояние от каждой из этих точек до центра бокса телефона
                for wrist in wrists:
                    if wrist is not None:
                        distance = np.linalg.norm(np.array(center_phone) - np.array(wrist))
                        if distance < THRESHOLD:
                            phone_detected = True
                            break  # Если нашли одну точку руки рядом с телефоном, прерываем цикл

            if phone_detected:
                break

        if phone_detected:
            print(f"Телефон в руке на кадре_{seconds_to_time(frame_count/fps)}")
            save_path = os.path.join(video_folder_path, f"{seconds_to_time(frame_count/fps)}.jpg")
            cv2.imwrite(save_path, frame)

    cap.release()

In [None]:
folder_path = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон"
SAVE_PATH = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/img_with_phones"
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

video_name = "00_26_30"
process_video(f"/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон/{video_name}.mp4", SAVE_PATH, video_name)
#process_all_videos_in_folder(folder_path, SAVE_PATH)

In [None]:
folder_path = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон"
SAVE_PATH = "/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/img_with_phones"
process_all_videos_in_folder(folder_path, SAVE_PATH)

# EfficientDet

In [None]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO
import os
import tensorflow as tf
import tensorflow_hub as hub

# 1. Инициализация моделей
device = torch.device("cuda:0")  # для использования первой GPU
model_url = "https://tfhub.dev/tensorflow/efficientdet/d6/1"
tfmodel = hub.load(model_url)
yolo_pose = YOLO('yolov8s-pose.pt')

# Параметры
THRESHOLD = 100  # Евклидово расстояние для определения близости
frames_with_phone = set()

def seconds_to_time(seconds):
    minutes, seconds = divmod(seconds, 60)
    minutes, seconds = int(minutes), int(seconds)
    if minutes < 10:
        minutes = f"0{minutes}"
    if seconds < 10:
        seconds = f"0{seconds}"
    return f"{minutes}:{seconds}"

# 2. Обработка видео
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 0
    fps *= 5
    while cap.isOpened():
        ret, frame = cap.read()
        frame_count += 1
        
        # Пропускаем кадры, чтобы обрабатывать только раз в секунду
        if frame_count % fps != 0:
            continue
        
        print(frame_count, fps)

        if not ret:
            break

        # Предсказания для каждой модели
        results_pose = yolo_pose.predict(frame)
        
        input_tensor = tf.convert_to_tensor(frame)  # Convert the frame to a tensor
        input_tensor = tf.expand_dims(input_tensor, axis=0)  # Add batch dimension

        detections = tfmodel(input_tensor)  # Делаем предсказание с помощью EfficientDet

        # Получение bounding box'ов для телефонов
        phone_bboxes = []
        boxes = detections["detection_boxes"].numpy()[0]
        scores = detections["detection_scores"].numpy()[0]
        class_ids = detections["detection_classes"].numpy()[0]

        height, width, _ = frame.shape

        all_phone_bboxes = []
        for i, class_id in enumerate(class_ids):
            if class_id == 77:  # класс "cell phone" имеет ID 77 в COCO
                # Масштабируем bounding boxes и конвертируем формат
                y_min, x_min, y_max, x_max = boxes[i]
                bbox = [x_min * width, y_min * height, x_max * width, y_max * height]
                
                confidence = scores[i]
                all_phone_bboxes.append((bbox, confidence))
                
        phone_bboxes = [bbox for bbox, confidence in all_phone_bboxes if confidence >= 0.4]
        
        phone_detected = False
        # Если bbox телефонов найдены, рассчитываем их центры
        for phone_bbox in phone_bboxes:
            cv2.rectangle(frame, (int(phone_bbox[0]), int(phone_bbox[1])), (int(phone_bbox[2]), int(phone_bbox[3])), (255, 0, 0), 2)
            # ... (ваш остальной код)
            center_phone = [(phone_bbox[0] + phone_bbox[2]) / 2, (phone_bbox[1] + phone_bbox[3]) / 2]
            
            # Получаем ключевые точки для всех обнаруженных объектов
            keypoints = results_pose[0].keypoints.xy
            
            # Для каждого обнаруженного объекта
            for person_keypoints in keypoints:
                # Вытаскиваем ключевые точки для рук
                try:
                    right_wrist = person_keypoints[10].cpu().numpy()
                except Exception as e:
                    pass
                try:
                    left_wrist = person_keypoints[9].cpu().numpy()
                except Exception as e:
                    pass

                # Визуализируем ключевые точки на кистях
                cv2.circle(frame, (int(right_wrist[0]), int(right_wrist[1])), 10, (128, 0, 128), -1)  # фиолетовый
                cv2.circle(frame, (int(left_wrist[0]), int(left_wrist[1])), 10, (0, 255, 255), -1)  # жёлтый
                # Проверяем расстояние от каждой из этих точек до центра бокса телефона
                for hand_point in [right_wrist, left_wrist]:
                    distance = np.linalg.norm(np.array(center_phone) - np.array(hand_point))
                    if distance < THRESHOLD:
                        phone_detected = True
                        break  # Если нашли одну точку руки рядом с телефоном, прерываем цикл
                    
            if phone_detected:
                break

        if phone_detected:
            print(f"Телефон в руке на кадре_{seconds_to_time(frame_count/12)}")
            frames_with_phone.add(frame_count)
            save_path = os.path.join(SAVE_PATH, f"{seconds_to_time(frame_count/12)}.jpg")
            cv2.imwrite(save_path, frame)

    print(sorted(frames_with_phone))
    cap.release()

# Запускаем обработку
mp4_name = "00_26_30"
video_path = f"/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон/{mp4_name}.mp4"
# Папка для сохранения кадров
SAVE_PATH = f"/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/img_with_phones/{mp4_name}"  # Замените на ваш путь
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)
process_video(video_path)

# EfficientDet TFLite

In [None]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO
import os
import tensorflow as tf
import tensorflow_hub as hub

# 1. Инициализация моделей
device = torch.device("cuda:0")  # для использования первой GPU
model_url = "https://tfhub.dev/tensorflow/efficientdet/d7/1"
tfmodel = hub.load(model_url)

# Сохраните модель в формате SavedModel
saved_model_path = "tfhub_model"
tf.saved_model.save(tfmodel, saved_model_path)

# Конвертируйте модель в TFLite с FP16 квантованием
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model_fp16 = converter.convert()

# Используйте TFLite модель для инференса
interpreter = tf.lite.Interpreter(model_content=tflite_model_fp16)
interpreter.allocate_tensors()

yolo_pose = YOLO('yolov8s-pose.pt')

# Параметры
THRESHOLD = 100  # Евклидово расстояние для определения близости
frames_with_phone = set()

def seconds_to_time(seconds):
    minutes, seconds = divmod(seconds, 60)
    minutes, seconds = int(minutes), int(seconds)
    if minutes < 10:
        minutes = f"0{minutes}"
    if seconds < 10:
        seconds = f"0{seconds}"
    return f"{minutes}:{seconds}"

# 2. Обработка видео
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        frame_count += 1
        
        if frame_count % fps != 0:
            continue

        if not ret:
            break

        # Предсказания для каждой модели
        results_pose = yolo_pose.predict(frame)
        
        input_tensor = tf.convert_to_tensor(frame)  # Convert the frame to a tensor
        input_tensor = tf.expand_dims(input_tensor, axis=0)  # Add batch dimension
        
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        interpreter.set_tensor(input_details[0]['index'], input_tensor.numpy())
        interpreter.invoke()
        detections = {
            "detection_boxes": interpreter.get_tensor(output_details[0]['index']),
            "detection_scores": interpreter.get_tensor(output_details[1]['index']),
            "detection_classes": interpreter.get_tensor(output_details[2]['index'])
        }

        # Получение bounding box'ов для телефонов
        phone_bboxes = []
        boxes = detections["detection_boxes"].numpy()[0]
        scores = detections["detection_scores"].numpy()[0]
        class_ids = detections["detection_classes"].numpy()[0]

        height, width, _ = frame.shape

        for i, class_id in enumerate(class_ids):
            if class_id == 77 and scores[i] >= 0.1:  # класс "cell phone" имеет ID 77 в COCO
                # Масштабируем bounding boxes и конвертируем формат
                y_min, x_min, y_max, x_max = boxes[i]
                bbox = [x_min * width, y_min * height, x_max * width, y_max * height]
                
                confidence = scores[i]
                phone_bboxes.append(bbox)
                cv2.putText(frame, f"{confidence:.2f}", (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
        
        phone_detected = False
        # Если bbox телефонов найдены, рассчитываем их центры
        for phone_bbox in phone_bboxes:
            cv2.rectangle(frame, (int(phone_bbox[0]), int(phone_bbox[1])), (int(phone_bbox[2]), int(phone_bbox[3])), (255, 0, 0), 2)
            # ... (ваш остальной код)
            center_phone = [(phone_bbox[0] + phone_bbox[2]) / 2, (phone_bbox[1] + phone_bbox[3]) / 2]
            
            # Получаем ключевые точки для всех обнаруженных объектов
            keypoints = results_pose[0].keypoints.xy
            
            # Для каждого обнаруженного объекта
            for person_keypoints in keypoints:
                # Вытаскиваем ключевые точки для рук
                try:
                    right_wrist = person_keypoints[10].cpu().numpy()
                except Exception as e:
                    pass
                try:
                    left_wrist = person_keypoints[9].cpu().numpy()
                except Exception as e:
                    pass

                # Визуализируем ключевые точки на кистях
                cv2.circle(frame, (int(right_wrist[0]), int(right_wrist[1])), 10, (128, 0, 128), -1)  # фиолетовый
                cv2.circle(frame, (int(left_wrist[0]), int(left_wrist[1])), 10, (0, 255, 255), -1)  # жёлтый
                # Проверяем расстояние от каждой из этих точек до центра бокса телефона
                for hand_point in [right_wrist, left_wrist]:
                    distance = np.linalg.norm(np.array(center_phone) - np.array(hand_point))
                    if distance < THRESHOLD:
                        phone_detected = True
                        break  # Если нашли одну точку руки рядом с телефоном, прерываем цикл
                    
            if phone_detected:
                break

        if phone_detected:
            print(f"Телефон в руке на кадре_{seconds_to_time(frame_count/12)}")
            frames_with_phone.add(frame_count)
            save_path = os.path.join(SAVE_PATH, f"{seconds_to_time(frame_count/12)}.jpg")
            cv2.imwrite(save_path, frame)

    print(sorted(frames_with_phone))
    cap.release()

# Запускаем обработку
mp4_name = "00_26_30"
video_path = f"/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/Есть телефон/{mp4_name}.mp4"
# Папка для сохранения кадров
SAVE_PATH = f"/home/student/snap/code/Hackathons/Haborovsk/Анализ бригад (телефон)/img_with_phones/{mp4_name}"  # Замените на ваш путь
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)
process_video(video_path)