In [16]:
pack_identify = {
    "first": [10, 127, 356, 151, 9, 8],                # Вертикальный лоб (Рационал.) / Лоб сужается кверху (Иррационал.)
    "second": [65, 52, 55, 295, 282, 285, 159, 386],   # Высокие брови (Рационал.) / Низкие брови (Иррационал.)
    "third": [10, 152, 234, 454]                       # Голова вытянута по вертикали (Рационал.) / Голова вытянута по горизонтали (Иррационал.)
}


In [17]:
import cv2
from imutils import face_utils
import pandas as pd
import numpy as np
import mediapipe as mp

import os
import re

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib
# Пути до ресурсов

DATASET_BINARY_DATA = "../binary_data/"
DATASET_ALL_DATA = "../all_data/"
#PREDICTOR_PATH = "../models/shape_predictor_68_face_landmarks.dat"

# Загружаем модель
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]
#Функция для нормализации изображения (Приведение к единому отображению)

def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    left_eye = points[33]
    right_eye = points[263]

    center_x = (left_eye[0] + right_eye[0]) / 2
    center_y = (left_eye[1] + right_eye[1]) / 2

    points[:, 0] -= center_x
    points[:, 1] -= center_y

    eye_dist = np.linalg.norm(left_eye - right_eye)
    if eye_dist > 0:
        points = points / eye_dist
    return points

def get_landmarks(image_path, landmarks = [], all_labels = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    height, width = image_rgb.shape[:2]

    # Детектируем лицо
    faces = face_mesh.process(image_rgb)

    if not faces.multi_face_landmarks:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = faces.multi_face_landmarks[0]

    landmarks = normalize_landmarks(np.array([(p.x * width, p.y * height) for p in shape.landmark]))
    landmarks = np.concatenate((np.concatenate((landmarks[pack_identify["first"]], landmarks[pack_identify["second"]]), axis=0), landmarks[pack_identify["third"]]), axis=0).flatten()

    return landmarks


def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k % 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue
        
        label = get_label_func(k)
        #label = k % 8
        #label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                
                all_landmarks.append(landmarks)
                all_labels.append(label)

        k += 1

In [18]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

all_landmarks_1 = []
all_labels_1 = []

build_dataframe(DATASET_BINARY_DATA, all_landmarks_1, all_labels_1) # получаем числовой код

In [19]:
columns = []
for idx in list(range(0, 27)) + list(range(89, 96)) + list(range(152, 171)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df_all = pd.DataFrame(all_landmarks, columns=columns)
df_all['label'] = all_labels
df_all = df_all.sample(frac=1).reset_index(drop=True)

df_binary = pd.DataFrame(all_landmarks_1, columns=columns)
df_binary['label'] = all_labels_1
df_binary = df_binary.sample(frac=1).reset_index(drop=True)

df_all

df_binary

ValueError: 106 columns passed, passed data had 36 columns

In [None]:
df_all

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y166,x167,y167,x168,y168,x169,y169,x170,y170,label
0,0.039556,0.800219,0.040551,0.835567,0.039370,0.904385,0.023748,0.476505,-0.032274,0.472442,...,0.452700,-0.055832,0.569626,0.005097,-0.044706,-0.425330,0.979296,-0.330337,1.040274,5
1,0.069204,0.949079,0.071400,0.994784,0.074506,1.061886,0.049254,0.485513,-0.014152,0.485037,...,0.465277,-0.042659,0.610971,0.009694,-0.079036,-0.409484,1.108200,-0.309829,1.179608,3
2,0.051260,0.849722,0.050984,0.886308,0.048096,0.968774,0.062516,0.547467,-0.006016,0.538381,...,0.509672,-0.042553,0.632430,0.026226,-0.027209,-0.447511,1.083088,-0.338900,1.149438,3
3,0.005170,0.797627,0.005314,0.829205,0.005636,0.884254,0.006503,0.585489,-0.067630,0.557873,...,0.509616,-0.096566,0.605416,0.004296,0.033541,-0.445744,0.870623,-0.354929,0.953996,3
4,0.059115,1.027295,0.055189,1.059976,0.042247,1.065503,0.086167,0.657299,0.008051,0.638515,...,0.603982,-0.028430,0.738018,0.047930,0.026426,-0.413625,0.975198,-0.324337,1.050984,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1861,0.318598,1.044855,0.309301,1.095019,0.261935,1.161662,0.430788,0.589299,0.306310,0.602951,...,0.588060,0.227855,0.732322,0.215513,-0.046318,-0.421969,1.300458,-0.272905,1.367967,0
1862,0.122004,0.843565,0.121035,0.884279,0.111251,0.962463,0.132051,0.468270,0.054811,0.468980,...,0.449379,0.016938,0.578169,0.059030,-0.060383,-0.426208,1.077536,-0.312471,1.139673,5
1863,-0.037614,0.848849,-0.038010,0.888391,-0.037046,0.941070,-0.030212,0.475336,-0.090194,0.464028,...,0.434640,-0.121510,0.570803,-0.012857,-0.026590,-0.467162,0.931349,-0.387284,1.006428,0
1864,-0.048651,0.808810,-0.049755,0.845119,-0.052097,0.911458,-0.039418,0.487818,-0.095240,0.475066,...,0.441709,-0.127323,0.569228,-0.007464,-0.045433,-0.492618,0.940141,-0.408656,1.012122,3


In [None]:
print(df_all["label"].unique())

print(df_binary["label"].unique())

[5 3 0 4 7 2 1 6]
[0 1]


In [None]:
print(df_all["label"].value_counts())

print(df_binary["label"].value_counts())

label
0    648
3    487
5    428
4    288
2      5
6      5
1      3
7      2
Name: count, dtype: int64
label
0    1209
1     656
Name: count, dtype: int64


In [None]:
# Разделим данные
X_train_subtype, X_test_subtype, y_train_subtype, y_test_subtype = train_test_split(
    df_all.drop(columns=['label']),  # все колонки кроме 'label'
    df_all['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    #stratify=df['label'],         # сбалансированная разбивка по классам
    random_state=42
)

X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    df_binary.drop(columns=['label']),  # все колонки кроме 'label'
    df_binary['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    stratify=df_binary['label'],         # сбалансированная разбивка по классам
    random_state=42
)

# Обучим модель
model_subtype = LogisticRegression(max_iter=1000)
model_subtype.fit(X_train_subtype, y_train_subtype)

model_binary = LogisticRegression(max_iter=1000)
model_binary.fit(X_train_binary, y_train_binary)

# Оценим качество
y_pred_subtype = model_subtype.predict(X_test_subtype)
print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

y_pred_binary = model_binary.predict(X_test_binary)
print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

Accuracy subtype: 0.4492
Accuracy binary class: 0.6917


In [None]:
def predict_dichotomy(image_path, model):
    landmarks = get_landmarks(image_path)
    
    X_input = pd.DataFrame([landmarks], columns=columns)

    prediction = model.predict(X_input)[0]
    proba = model.predict_proba(X_input)[0]

    return prediction, proba

In [None]:
joblib.dump(model_subtype, 'subtype_classifier.pkl')

joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

subtypes = {
    0 : "Шизоидный",
    1 : "Параноидальный",
    2 : "Нарциссический",
    3 : "Психопатический",
    4 : "Компульсивный",
    5 : "Истерический",
    6 : "Депрессивный",
    7 : "Мазохистический"
}

In [None]:
model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

In [None]:
# 2. Инициализация детектора и предиктора
result_subtype, confidence_subtype = predict_dichotomy("../all_data/12/16 (2).jpg", model_subtype_file)
print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

result_binary, confidence_binary = predict_dichotomy("../all_data/12/16 (2).jpg", model_binary_file)
print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")


Подтип личности: Истерический (Уверенность: 0.28)
Дихотомия: Этика (Уверенность: 0.73)


In [None]:
def normalize_landmarks(landmarks):
    """
    landmarks: np.array с shape (N, 2), координаты в пикселях
    Возвращает: нормализованные landmarks и параметры центра и масштаба
    """
    left_eye = landmarks[33]
    right_eye = landmarks[263]
    center = (left_eye + right_eye) / 2
    scale = np.linalg.norm(left_eye - right_eye)

    normalized = (landmarks - center) / scale
    return normalized, center, scale


def denormalize_landmarks(normalized, center, scale):
    """
    Восстанавливает нормализованные точки обратно в пиксельные координаты
    """
    return (normalized * scale) + center

def draw_normalized_landmarks(image_path):
    # 1. Загрузка изображения
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Не удалось загрузить изображение")
    h, w = image.shape[:2]

    # 2. Инициализация MediaPipe
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)

    # 3. Обработка изображения
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    # 4. Проверка наличия лица
    if not results.multi_face_landmarks:
        print("Лицо не найдено")
        return

    # 5. Извлечение точек и нормализация
    face_landmarks = results.multi_face_landmarks[0]
    landmarks = np.array([
        [lm.x * w, lm.y * h]
        for lm in face_landmarks.landmark
    ])
    normalized, center, scale = normalize_landmarks(landmarks)
    restored = denormalize_landmarks(normalized, center, scale)

    # 6. Отображение денормализованных точек
    for point in restored.astype(int):
        x, y = point
        cv2.circle(image, (x, y), radius=1, color=(0, 255, 255), thickness=-1)

    # 7. Отображение изображения
    cv2.imshow("Normalized landmarks (restored)", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
draw_normalized_landmarks("../all_data/12/16 (2).jpg")