In [3]:
!pip install pandas numpy mediapipe scikit-learn catboost transformers librosa opencv-python torch
!pip install torch torchvision torchaudio

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting jaxlib
  Downloading jaxlib-0.4.35-cp310-cp310-manylinux2014_x86_64.whl (87.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.3/87.3 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting absl-py
  Downloading absl_py-2.1.0-py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.7/133.7 KB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3
  Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 KB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sounddevice>=0.4.4
  Downloading sounddevice-0.5.1-py3-none-any.

In [5]:
import os
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from catboost import CatBoostRegressor
import torch

# Определяем названия меток
label_names = ['extraversion', 'neuroticism', 'agreeableness',
               'conscientiousness', 'openness']

class PersonalityDataset:
    def __init__(self, csv_file, path_to_video):
        self.data = pd.read_csv(csv_file)
        self.path_to_video = path_to_video
        self.label_names = label_names
        # Ограничиваем датасет первыми 30 видео
        self.data = self.data.iloc[:30]

    def __len__(self):
        return len(self.data)

    def extract_keypoints(self, video_path, visualize=False):
        mp_holistic = mp.solutions.holistic
        mp_drawing = mp.solutions.drawing_utils

        holistic = mp_holistic.Holistic(
            static_image_mode=False,
            model_complexity=2,
            enable_segmentation=False,
            refine_face_landmarks=True
        )

        cap = cv2.VideoCapture(video_path)
        keypoints_list = []

        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1

            # Преобразование изображения в формат RGB
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Получение результатов
            results = holistic.process(image)

            keypoints = []

            # Позы
            if results.pose_landmarks:
                for landmark in results.pose_landmarks.landmark:
                    keypoints.extend([landmark.x, landmark.y,
                                      landmark.z, landmark.visibility])
            else:
                keypoints.extend([0] * 33 * 4)

            # Лицо
            if results.face_landmarks:
                for landmark in results.face_landmarks.landmark:
                    keypoints.extend([landmark.x, landmark.y, landmark.z])
            else:
                keypoints.extend([0] * 468 * 3)

            # Левая рука
            if results.left_hand_landmarks:
                for landmark in results.left_hand_landmarks.landmark:
                    keypoints.extend([landmark.x, landmark.y, landmark.z])
            else:
                keypoints.extend([0] * 21 * 3)

            # Правая рука
            if results.right_hand_landmarks:
                for landmark in results.right_hand_landmarks.landmark:
                    keypoints.extend([landmark.x, landmark.y, landmark.z])
            else:
                keypoints.extend([0] * 21 * 3)

            keypoints_list.append(keypoints)

            if visualize and frame_count == 1:
                # Визуализация ключевых точек на первом кадре
                annotated_image = frame.copy()
                # Позы
                if results.pose_landmarks:
                    mp_drawing.draw_landmarks(
                        annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
                # Лицо
                if results.face_landmarks:
                    mp_drawing.draw_landmarks(
                        annotated_image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
                # Левая рука
                if results.left_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                # Правая рука
                if results.right_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                # Преобразование изображения в формат RGB для отображения
                annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
                # Отображение изображения в Jupyter Notebook
                plt.figure(figsize=(10, 10))
                plt.imshow(annotated_image)
                plt.axis('off')
                plt.title(f"Ключевые точки для видео: {os.path.basename(video_path)}")
                plt.show()

        cap.release()
        cv2.destroyAllWindows()
        holistic.close()

        # Усредняем ключевые точки по всем кадрам
        keypoints_array = np.array(keypoints_list)
        if keypoints_array.size == 0:
            num_keypoints = (33 * 4) + (468 * 3) + (21 * 3 * 2)
            keypoints_mean = np.zeros(num_keypoints)
        else:
            keypoints_mean = keypoints_array.mean(axis=0)

        return keypoints_mean

# Путь к CSV-файлу с аннотациями и к директории с видео
csv_file = 'processed_data/train.csv'
path_to_video = 'processed_data/train_video'

# Создание экземпляра датасета
dataset = PersonalityDataset(csv_file, path_to_video)

# Списки для хранения признаков, меток и идентификаторов видео
X = []
y = []
video_ids = []

N_visualization_samples = 5  # Количество видео для визуализации

for idx in range(len(dataset)):
    row = dataset.data.iloc[idx]
    video_file = row['video_file']
    video_path = os.path.join(path_to_video, video_file)
    if not os.path.isfile(video_path):
        print(f"Video file not found: {video_path}")
        continue

    visualize = idx < N_visualization_samples

    # Извлечение ключевых точек с визуализацией
    keypoints = dataset.extract_keypoints(video_path, visualize=visualize)

    # Подготовка меток
    labels = {}
    for label_name in label_names:
        labels[label_name] = torch.tensor(row[label_name], dtype=torch.float32)

    # Извлекаем значения меток
    label_values = []
    for label_name in label_names:
        label_value = labels[label_name].item()
        label_values.append(label_value)

    X.append(keypoints)
    y.append(label_values)
    video_ids.append(video_file)

X = np.array(X)
y = np.array(y)
video_ids = np.array(video_ids)

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test, video_ids_train, video_ids_test = train_test_split(
    X, y, video_ids, test_size=0.2, random_state=42)

# Обучение моделей
n_labels = y_train.shape[1]
regressors = {}

for i in range(n_labels):
    print(f"Обучение модели для метки '{label_names[i]}' ({i+1}/{n_labels})")
    model = CatBoostRegressor(
        iterations=1000,
        learning_rate=0.01,
        depth=6,
        loss_function='RMSE',
        eval_metric='RMSE',
        random_seed=42,
        verbose=100
    )
    model.fit(
        X_train,
        y_train[:, i],
        eval_set=(X_test, y_test[:, i]),
        use_best_model=True
    )
    regressors[label_names[i]] = model

# Предсказания и оценка
predictions = {label_name: {} for label_name in label_names}
ground_truths = {label_name: {} for label_name in label_names}

for i in range(len(X_test)):
    x = X_test[i]
    video_id = video_ids_test[i]

    for idx, label_name in enumerate(label_names):
        model = regressors[label_name]
        y_pred = model.predict(x.reshape(1, -1))[0]
        y_true = y_test[i][idx]

        predictions[label_name][video_id] = y_pred
        ground_truths[label_name][video_id] = y_true

# Функции для вычисления метрики F1 score
def calculate_tag_f1(pred, truth, epsilon=0.1):
    predicts = []
    truths_list = []
    for key in truth.keys():
        pred_value = pred.get(key)
        if pred_value is None:
            raise Exception(f"Предсказание для видео {key} отсутствует")
        truth_value = truth[key]
        predicts.append(1 if abs(pred_value - truth_value) < epsilon else 0)
        truths_list.append(1)  # Истинные значения всегда 1
    return f1_score(truths_list, predicts, average='macro')

def calculate_f1(predictions, ground_truths):
    results = []
    for label_name in label_names:
        pred = predictions[label_name]
        truth = ground_truths[label_name]
        f1 = calculate_tag_f1(pred, truth)
        results.append(f1)
        print(f"F1 score для метки '{label_name}': {f1:.4f}")
    average_f1 = np.mean(results)
    print(f"\nСредний F1 score по всем меткам: {average_f1:.4f}")
    return average_f1

average_f1 = calculate_f1(predictions, ground_truths)


I0000 00:00:1731181032.370605  279224 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1731181032.383314  361918 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 550.127.05), renderer: Tesla V100-SXM3-32GB/PCIe/SSE2
W0000 00:00:1731181032.549271  361914 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731181032.694178  361917 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731181032.696663  361916 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731181032.697197  361914 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:17

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (459,) + inhomogeneous part.