In [None]:
!pip install mediapipe



In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.228-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from ultralytics import YOLO
import random

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
import math
import torch
from collections import deque

In [None]:
point_pairs = [
    (5, 11), (6, 12), # Торс
    (11, 12), # Между ног
    (0, 1), (0, 2), (1, 3), (2, 4),  # Голова
    (5, 6),  # Соединение плечей
    (5, 7), (7, 9),  # Левая рука
    (6, 8), (8, 10),  # Правая рука
    (11, 13), (13, 15),  # Нога левая
    (12, 14), (14, 16)  # Нога правая
]

In [None]:
mp_face_mesh = mp.solutions.face_mesh

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
def classify_expression(face_landmarks, image_width, image_height):
    """
    Определение настроения:
    - улыбается
    - нейтрален
    - грустит
    """
    def get_xy(idx):
        lm = face_landmarks.landmark[idx]
        return lm.x * image_width, lm.y * image_height

    left_mouth = get_xy(61)
    right_mouth = get_xy(291)
    top_lip = get_xy(13)
    bottom_lip = get_xy(14)

    mouth_width = math.dist(left_mouth, right_mouth)
    mouth_height = math.dist(top_lip, bottom_lip) + 1e-6
    ratio = mouth_width / mouth_height

    if ratio > 2.1:
        return "mood: happy"
    elif ratio < 1.6:
        return "mood: sad"
    else:
        return "mood: neutral"


def compute_normalized_speed(history, fps: float) -> float:
    """
    history: deque[(frame_idx, cx, cy, box_h)]
    Возвращает нормированную скорость:
    - 0.0  => не движется
    - 0.5  => медленно идёт
    - 1.5+ => быстро движется / бежит
    """
    if len(history) < 2:
        return 0.0

    f0, cx0, cy0, h0 = history[0]
    f1, cx1, cy1, h1 = history[-1]

    frames = f1 - f0
    if frames <= 0:
        return 0.0

    time_sec = frames / fps
    if time_sec <= 0:
        return 0.0

    dist_px = math.hypot(cx1 - cx0, cy1 - cy0)
    box_h = (h0 + h1) / 2.0
    if box_h <= 0:
        return 0.0

    speed_norm = (dist_px / box_h) / time_sec
    return speed_norm


def tempo_from_norm_speed(speed_norm: float) -> str:
    """
    Пороговые значения нужно будет подстроить под свои видео.
    """
    # фильтр мелких дрожаний
    if speed_norm < 0.05:
        return "temp: standing"
    elif speed_norm < 0.6:
        return "temp: walk"
    else:
        return "temp: run / walk fast"

In [None]:
def process_video_with_tracking(
    model,
    input_video_path,
    show_video=True,
    save_video=False,
    output_video_path="output_video.mp4"
):
    # Open the input video file
    cap = cv2.VideoCapture(str(input_video_path))

    if not cap.isOpened():
        raise Exception("Error: Could not open video file.")

    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps <= 0:
        fps = 25.0

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = None
    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(
            str(output_video_path),
            fourcc,
            fps,
            (frame_width, frame_height)
        )

    track_history = {}
    frame_idx = -1

    # Один FaceMesh на всё видео
    with mp_face_mesh.FaceMesh(
        static_image_mode=False,
        refine_landmarks=True,
        max_num_faces=1,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5,
    ) as face_mesh:

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame_idx += 1

            results = model.track(
                frame,
                iou=0.5,
                conf=0.3,
                persist=True,
                imgsz=608,
                verbose=False,
                tracker="botsort.yaml"
            )

            if results and results[0].boxes.id is not None:
                boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
                ids   = results[0].boxes.id.cpu().numpy().astype(int)

                keypoints_all = (
                    results[0].keypoints.xy.cpu().numpy()
                    if results[0].keypoints is not None else None
                )

                for i, (box, obj_id) in enumerate(zip(boxes, ids)):
                    x1, y1, x2, y2 = box

                    random.seed(int(obj_id))
                    color = (
                        random.randint(0, 255),
                        random.randint(0, 255),
                        random.randint(0, 255),
                    )

                    # ---------- Центр человека по лендмаркам / боксу ----------
                    if keypoints_all is not None:
                        keypoints = keypoints_all[i]
                        valid_pts = keypoints[keypoints[:, 0] > 0]
                        if len(valid_pts) > 0:
                            cx, cy = valid_pts.mean(axis=0)
                        else:
                            cx = (x1 + x2) / 2.0
                            cy = (y1 + y2) / 2.0
                    else:
                        cx = (x1 + x2) / 2.0
                        cy = (y1 + y2) / 2.0

                    # ---------- Скорость и темп движения ----------
                    # высота бокса человека
                    box_h = max(1, y2 - y1)

                    # центр человека (можно оставить как раньше)
                    cx = (x1 + x2) / 2.0
                    cy = (y1 + y2) / 2.0

                    # берем историю для id
                    hist = track_history.setdefault(obj_id, deque(maxlen=10))
                    hist.append((frame_idx, cx, cy, box_h))

                    speed_norm = compute_normalized_speed(hist, fps)
                    tempo_text = tempo_from_norm_speed(speed_norm)


                    # ---------- Лицо + настроение ----------
                    mood_text = "mood: unknown"

                    # Обрезаем лицо внутри бокса
                    x1c = max(0, x1)
                    y1c = max(0, y1)
                    x2c = min(frame_width, x2)
                    y2c = min(frame_height, y2)

                    face_roi = frame[y1c:y2c, x1c:x2c]
                    if face_roi.size > 0:
                        rgb_face = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)
                        face_result = face_mesh.process(rgb_face)
                        if face_result.multi_face_landmarks:
                            face_lm = face_result.multi_face_landmarks[0]
                            h, w = face_roi.shape[:2]
                            mood_text = classify_expression(face_lm, w, h)

                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

                    if keypoints_all is not None:
                        keypoints = keypoints_all[i]
                        # точки
                        for p in keypoints:
                            xk, yk = p
                            if xk > 0 and yk > 0:
                                cv2.circle(frame, (int(xk), int(yk)), 3, (0, 255, 255), -1)

                        # линии
                        for pair in point_pairs:
                            start, end = pair
                            if (
                                keypoints[start][0] > 0 and keypoints[start][1] > 0 and
                                keypoints[end][0]   > 0 and keypoints[end][1]   > 0
                            ):
                                x1p, y1p = int(keypoints[start][0]), int(keypoints[start][1])
                                x2p, y2p = int(keypoints[end][0]),   int(keypoints[end][1])
                                cv2.line(frame, (x1p, y1p), (x2p, y2p), color, 2)

                    text_y_mood  = max(20, y1 - 30)
                    text_y_tempo = max(35, y1 - 15)

                    cv2.putText(
                        frame,
                        mood_text,
                        (x1, text_y_mood),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        color,
                        2,
                    )
                    cv2.putText(
                        frame,
                        tempo_text,
                        (x1, text_y_tempo),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        color,
                        2,
                    )

                    cv2.putText(
                        frame,
                        f"Id {obj_id}",
                        (x1, min(frame_height - 10, y1 + 15)),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 255, 255),
                        2,
                    )

            if save_video and out is not None:
                out.write(frame)

            if show_video:
                cv2.imshow("frame", frame)

            # if cv2.waitKey(int(1000 / fps)) & 0xFF == ord("q"):
            #     break

    cap.release()
    if save_video and out is not None:
        out.release()
    # cv2.destroyAllWindows()


In [None]:
model = YOLO('yolov8m-pose.pt')
model.to(device)
process_video_with_tracking(model, "/content/data/video_1.mp4", show_video=False, save_video=True, output_video_path="/content/data/out/video_1.mp4")

In [None]:
process_video_with_tracking(model, "/content/data/video_2.mp4", show_video=False, save_video=True, output_video_path="/content/data/out/video_2.mp4")