In [11]:
pack_identify = {
    "first": [10, 127, 356, 151, 9, 8],                # Вертикальный лоб (Рационал.) / Лоб сужается кверху (Иррационал.)
    "second": [65, 52, 55, 295, 282, 285, 159, 386],   # Высокие брови (Рационал.) / Низкие брови (Иррационал.)
    "third": [10, 152, 234, 454]                       # Голова вытянута по вертикали (Рационал.) / Голова вытянута по горизонтали (Иррационал.)
}


In [12]:
import cv2
from imutils import face_utils
import pandas as pd
import numpy as np
import mediapipe as mp

import os
import re

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib
# Пути до ресурсов

DATASET_BINARY_DATA = "../binary_data/"
DATASET_ALL_DATA = "../all_data/"
#PREDICTOR_PATH = "../models/shape_predictor_68_face_landmarks.dat"

# Загружаем модель
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]
#Функция для нормализации изображения (Приведение к единому отображению)

def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    left_eye = points[33]
    right_eye = points[263]

    center_x = (left_eye[0] + right_eye[0]) / 2
    center_y = (left_eye[1] + right_eye[1]) / 2

    points[:, 0] -= center_x
    points[:, 1] -= center_y

    eye_dist = np.linalg.norm(left_eye - right_eye)
    if eye_dist > 0:
        points = points / eye_dist
    return points

def get_landmarks(image_path, landmarks = [], all_labels = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    height, width = image_rgb.shape[:2]

    # Детектируем лицо
    faces = face_mesh.process(image_rgb)

    if not faces.multi_face_landmarks:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = faces.multi_face_landmarks[0]

    landmarks = normalize_landmarks(np.array([(p.x * width, p.y * height) for p in shape.landmark]))
    landmarks = np.concatenate((np.concatenate((landmarks[pack_identify["first"]], landmarks[pack_identify["second"]]), axis=0), landmarks[pack_identify["third"]]), axis=0).flatten()

    return landmarks


def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k % 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue
        
        label = get_label_func(k)
        #label = k % 8
        #label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                
                all_landmarks.append(landmarks)
                all_labels.append(label)

        k += 1

In [13]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

all_landmarks_1 = []
all_labels_1 = []

build_dataframe(DATASET_BINARY_DATA, all_landmarks_1, all_labels_1) # получаем числовой код

In [14]:
# Собираем индексы, сохраняя порядок и дубликаты
all_indices = pack_identify["first"] + pack_identify["second"] + pack_identify["third"]

# Создаём колонки
columns = []
for idx in all_indices:
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

# Создаём DataFrame
df_all = pd.DataFrame(all_landmarks, columns=columns)
df_all['label'] = all_labels
df_all = df_all.sample(frac=1).reset_index(drop=True)

df_binary = pd.DataFrame(all_landmarks_1, columns=columns)
df_binary['label'] = all_labels_1
df_binary = df_binary.sample(frac=1).reset_index(drop=True)


In [15]:
df_all

Unnamed: 0,x10,y10,x127,y127,x356,y356,x151,y151,x9,y9,...,y386,x10.1,y10.1,x152,y152,x234,y234,x454,y454,label
0,-0.067131,-0.591900,-0.534609,-0.024784,1.013927,0.064618,-0.096543,-0.401281,-0.117178,-0.198028,...,-0.019128,-0.067131,-0.591900,-0.102047,1.260523,-0.509662,0.127965,1.005665,0.231312,0
1,0.030668,-0.556639,-0.820347,0.034196,0.647243,0.007904,0.038391,-0.400352,0.041365,-0.263018,...,-0.053178,0.030668,-0.556639,0.029015,1.072662,-0.807159,0.179597,0.625210,0.148259,5
2,0.086513,-0.518879,-0.831996,-0.048381,0.592784,-0.027187,0.089044,-0.345039,0.086011,-0.174371,...,-0.033157,0.086513,-0.518879,-0.013710,1.018535,-0.833950,0.084846,0.563891,0.097213,3
3,-0.025009,-0.605315,-0.732649,0.024751,0.892429,0.039884,-0.033404,-0.412786,-0.038928,-0.205591,...,-0.045818,-0.025009,-0.605315,-0.025651,1.271017,-0.721811,0.187262,0.889638,0.208106,4
4,-0.012369,-0.604163,-0.757539,0.082001,0.777499,0.062968,-0.009824,-0.432656,-0.007294,-0.257983,...,-0.064661,-0.012369,-0.604163,0.032209,1.227239,-0.747405,0.244669,0.772467,0.225704,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1861,-0.129152,-0.613797,-0.560662,0.063708,0.924527,-0.038382,-0.129112,-0.424369,-0.120786,-0.240399,...,-0.085925,-0.129152,-0.613797,0.040839,1.188648,-0.526469,0.214105,0.937405,0.126584,3
1862,-0.035176,-0.524547,-0.673068,-0.062429,0.801108,-0.041924,-0.038643,-0.335852,-0.039347,-0.140594,...,-0.042420,-0.035176,-0.524547,-0.008271,1.070624,-0.650913,0.069799,0.791036,0.094245,3
1863,-0.037806,-0.594213,-0.698317,0.004075,0.800846,-0.013164,-0.036498,-0.402333,-0.033680,-0.217135,...,-0.068454,-0.037806,-0.594213,0.013973,1.230597,-0.676995,0.158736,0.791873,0.145130,0
1864,-0.154424,-0.561486,-0.554989,0.004171,0.954888,-0.097955,-0.151665,-0.372190,-0.141967,-0.206888,...,-0.085686,-0.154424,-0.561486,0.060834,1.224809,-0.512960,0.145405,0.963992,0.058443,4


In [16]:
print(df_all["label"].unique())

print(df_binary["label"].unique())

[0 5 3 4 2 1 7 6]
[1 0]


In [17]:
print(df_all["label"].value_counts())

print(df_binary["label"].value_counts())

label
0    648
3    487
5    428
4    288
2      5
6      5
1      3
7      2
Name: count, dtype: int64
label
1    1849
0      15
Name: count, dtype: int64


In [18]:
# Разделим данные
X_train_subtype, X_test_subtype, y_train_subtype, y_test_subtype = train_test_split(
    df_all.drop(columns=['label']),  # все колонки кроме 'label'
    df_all['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    #stratify=df['label'],         # сбалансированная разбивка по классам
    random_state=42
)

X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    df_binary.drop(columns=['label']),  # все колонки кроме 'label'
    df_binary['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    stratify=df_binary['label'],         # сбалансированная разбивка по классам
    random_state=42
)

# Обучим модель
model_subtype = LogisticRegression(max_iter=1000)
model_subtype.fit(X_train_subtype, y_train_subtype)

model_binary = LogisticRegression(max_iter=1000)
model_binary.fit(X_train_binary, y_train_binary)

# Оценим качество
y_pred_subtype = model_subtype.predict(X_test_subtype)
print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

y_pred_binary = model_binary.predict(X_test_binary)
print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

Accuracy subtype: 0.4492
Accuracy binary class: 0.9920


In [19]:
def predict_dichotomy(image_path, model):
    landmarks = get_landmarks(image_path)
    
    X_input = pd.DataFrame([landmarks], columns=columns)

    prediction = model.predict(X_input)[0]
    proba = model.predict_proba(X_input)[0]

    return prediction, proba

In [20]:
joblib.dump(model_subtype, 'subtype_classifier.pkl')

joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

subtypes = {
    0 : "Шизоидный",
    1 : "Параноидальный",
    2 : "Нарциссический",
    3 : "Психопатический",
    4 : "Компульсивный",
    5 : "Истерический",
    6 : "Депрессивный",
    7 : "Мазохистический"
}

In [21]:
model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

In [22]:
# 2. Инициализация детектора и предиктора
result_subtype, confidence_subtype = predict_dichotomy("../all_data/12/16 (2).jpg", model_subtype_file)
print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

result_binary, confidence_binary = predict_dichotomy("../all_data/12/16 (2).jpg", model_binary_file)
print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")


Подтип личности: Истерический (Уверенность: 0.28)
Дихотомия: Логика (Уверенность: 0.99)


In [23]:
def normalize_landmarks(landmarks):
    """
    landmarks: np.array с shape (N, 2), координаты в пикселях
    Возвращает: нормализованные landmarks и параметры центра и масштаба
    """
    left_eye = landmarks[33]
    right_eye = landmarks[263]
    center = (left_eye + right_eye) / 2
    scale = np.linalg.norm(left_eye - right_eye)

    normalized = (landmarks - center) / scale
    return normalized, center, scale


def denormalize_landmarks(normalized, center, scale):
    """
    Восстанавливает нормализованные точки обратно в пиксельные координаты
    """
    return (normalized * scale) + center

def draw_normalized_landmarks(image_path):
    # 1. Загрузка изображения
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Не удалось загрузить изображение")
    h, w = image.shape[:2]

    # 2. Инициализация MediaPipe
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)

    # 3. Обработка изображения
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    # 4. Проверка наличия лица
    if not results.multi_face_landmarks:
        print("Лицо не найдено")
        return

    # 5. Извлечение точек и нормализация
    face_landmarks = results.multi_face_landmarks[0]
    landmarks = np.array([
        [lm.x * w, lm.y * h]
        for lm in face_landmarks.landmark
    ])
    normalized, center, scale = normalize_landmarks(landmarks)
    restored = denormalize_landmarks(normalized, center, scale)

    # 6. Отображение денормализованных точек
    for point in restored.astype(int):
        x, y = point
        cv2.circle(image, (x, y), radius=1, color=(0, 255, 255), thickness=-1)

    # 7. Отображение изображения
    cv2.imshow("Normalized landmarks (restored)", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [24]:
draw_normalized_landmarks("../all_data/12/16 (2).jpg")