In [59]:
pack_identify = {
    "first" : range(16, 27),
    "second" : range(89, 96),
    "third" : list(range(0, 16)) + list(range(152, 171))
}




In [70]:
import cv2
from imutils import face_utils
import pandas as pd
import numpy as np
import mediapipe as mp

import os
import re

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib
# Пути до ресурсов

DATASET_BINARY_DATA = "../binary_data/"
DATASET_ALL_DATA = "../all_data/"
#PREDICTOR_PATH = "../models/shape_predictor_68_face_landmarks.dat"

# Загружаем модель
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]
#Функция для нормализации изображения (Приведение к единому отображению)

def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    left_eye = points[33]
    right_eye = points[263]

    center_x = (left_eye[0] + right_eye[0]) / 2
    center_y = (left_eye[1] + right_eye[1]) / 2

    points[:, 0] -= center_x
    points[:, 1] -= center_y

    eye_dist = np.linalg.norm(left_eye - right_eye)
    if eye_dist > 0:
        points = points / eye_dist
    return points

def get_landmarks(image_path, landmarks = [], all_labels = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    height, width = image_rgb.shape[:2]

    # Детектируем лицо
    faces = face_mesh.process(image_rgb)

    if not faces.multi_face_landmarks:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = faces.multi_face_landmarks[0]

    landmarks = normalize_landmarks(np.array([(p.x * width, p.y * height) for p in shape.landmark]))
    landmarks = np.concatenate((np.concatenate((landmarks[pack_identify["first"]], landmarks[pack_identify["second"]]), axis=0), landmarks[pack_identify["third"]]), axis=0).flatten()

    return landmarks


def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k % 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue
        
        label = get_label_func(k)
        #label = k % 8
        #label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                
                all_landmarks.append(landmarks)
                all_labels.append(label)

        k += 1

In [71]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

all_landmarks_1 = []
all_labels_1 = []

build_dataframe(DATASET_BINARY_DATA, all_landmarks_1, all_labels_1) # получаем числовой код

In [72]:
columns = []
for idx in list(range(0, 27)) + list(range(89, 96)) + list(range(152, 171)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df_all = pd.DataFrame(all_landmarks, columns=columns)
df_all['label'] = all_labels
df_all = df_all.sample(frac=1).reset_index(drop=True)

df_binary = pd.DataFrame(all_landmarks_1, columns=columns)
df_binary['label'] = all_labels_1
df_binary = df_binary.sample(frac=1).reset_index(drop=True)

df_all

df_binary

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y166,x167,y167,x168,y168,x169,y169,x170,y170,label
0,0.046266,0.933800,0.046671,0.967788,0.045077,1.026492,0.054841,0.646825,-0.014084,0.625214,...,0.585232,-0.045930,0.708533,0.023516,0.015415,-0.421710,1.052723,-0.324736,1.132595,0
1,-0.118778,0.839132,-0.116386,0.881601,-0.100413,0.966871,-0.169383,0.497919,-0.204837,0.492951,...,0.460188,-0.214722,0.599911,-0.093300,-0.087532,-0.445820,1.032903,-0.382828,1.105150,0
2,0.055933,0.790068,0.055807,0.824939,0.053576,0.909152,0.062809,0.510190,-0.013916,0.497477,...,0.464860,-0.050578,0.587600,0.026354,-0.034305,-0.465571,1.003913,-0.355484,1.076769,0
3,-0.009923,0.834886,-0.007193,0.872715,0.000660,0.936842,-0.040344,0.462987,-0.085889,0.460045,...,0.440948,-0.106725,0.575125,-0.027068,-0.047950,-0.420009,0.977054,-0.335376,1.041828,0
4,-0.016074,0.855076,-0.011879,0.892938,0.000896,0.943568,-0.057886,0.507236,-0.103359,0.498581,...,0.473056,-0.119796,0.595847,-0.048357,-0.012535,-0.372332,0.940869,-0.297994,1.009975,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1860,0.139045,0.935751,0.133313,0.978252,0.108706,1.042265,0.193262,0.560447,0.100940,0.553672,...,0.524907,0.048780,0.661767,0.108473,-0.016441,-0.458782,1.098314,-0.342807,1.176165,1
1861,-0.037913,0.854590,-0.034524,0.888173,-0.022426,0.942613,-0.060605,0.541404,-0.109079,0.526171,...,0.494070,-0.129085,0.616089,-0.040141,-0.007478,-0.415882,0.947433,-0.338028,1.021811,0
1862,-0.025208,0.874670,-0.023035,0.912098,-0.015099,0.984541,-0.048830,0.543554,-0.103273,0.531074,...,0.497048,-0.128964,0.634099,-0.030705,-0.023786,-0.432053,1.029236,-0.348336,1.105139,0
1863,-0.063365,0.738255,-0.061938,0.774282,-0.049019,0.864577,-0.109036,0.477111,-0.155682,0.468771,...,0.436934,-0.168307,0.548578,-0.067603,-0.043903,-0.430152,0.959348,-0.356233,1.028535,1


In [73]:
df_all

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y166,x167,y167,x168,y168,x169,y169,x170,y170,label
0,0.060507,0.909312,0.054020,0.945932,0.031529,0.988229,0.124322,0.586009,0.048133,0.574327,...,0.544995,-0.004329,0.657544,0.082996,-0.008622,-0.472903,0.999178,-0.369378,1.068182,3
1,-0.015541,0.747902,-0.015295,0.780379,-0.013410,0.829302,-0.007173,0.490625,-0.058120,0.474391,...,0.441090,-0.088661,0.533014,0.003117,-0.029514,-0.393293,0.815037,-0.312903,0.880125,1
2,0.087347,0.809892,0.083857,0.846168,0.069546,0.901099,0.132549,0.518017,0.054130,0.509609,...,0.481555,0.004614,0.581264,0.077278,-0.026640,-0.422940,0.941004,-0.316743,1.005241,2
3,-0.017644,0.914713,-0.014905,0.951380,-0.006358,0.998726,-0.040798,0.575509,-0.091284,0.560747,...,0.526892,-0.115025,0.653421,-0.027650,-0.016987,-0.382517,0.978927,-0.306554,1.053112,0
4,-0.389598,1.295044,-0.373821,1.356131,-0.297704,1.435526,-0.573914,0.705866,-0.546105,0.693342,...,0.646010,-0.492085,0.839857,-0.309773,-0.052610,-0.366010,1.412697,-0.360272,1.520794,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1861,0.272030,0.854804,0.270160,0.899076,0.243542,0.981746,0.318126,0.467898,0.213149,0.479996,...,0.465006,0.159618,0.588453,0.146964,-0.081710,-0.398920,1.155090,-0.248342,1.211671,1
1862,0.525425,1.262673,0.512142,1.323182,0.434407,1.415017,0.658354,0.702820,0.504858,0.735535,...,0.737484,0.420413,0.897798,0.331526,-0.075678,-0.412950,1.699048,-0.207897,1.759993,0
1863,0.057686,0.846262,0.056140,0.891622,0.048668,0.956731,0.070586,0.397069,0.006509,0.405228,...,0.393633,-0.029062,0.531571,0.033602,-0.083236,-0.441645,1.012764,-0.348070,1.077147,3
1864,0.024416,0.878421,0.023652,0.919058,0.019195,0.970210,0.031583,0.474934,-0.024205,0.469119,...,0.448908,-0.059214,0.569774,0.018229,-0.035939,-0.427547,0.981709,-0.337334,1.047864,3


In [74]:
print(df_all["label"].unique())

print(df_binary["label"].unique())

[3 1 2 0 6 4 7 5]
[0 1]


In [75]:
print(df_all["label"].value_counts())

print(df_binary["label"].value_counts())

label
0    651
1    490
3    427
2    292
4      3
6      1
7      1
5      1
Name: count, dtype: int64
label
0    1209
1     656
Name: count, dtype: int64


In [76]:
# Разделим данные
X_train_subtype, X_test_subtype, y_train_subtype, y_test_subtype = train_test_split(
    df_all.drop(columns=['label']),  # все колонки кроме 'label'
    df_all['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    #stratify=df['label'],         # сбалансированная разбивка по классам
    random_state=42
)

X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    df_binary.drop(columns=['label']),  # все колонки кроме 'label'
    df_binary['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    stratify=df_binary['label'],         # сбалансированная разбивка по классам
    random_state=42
)

# Обучим модель
model_subtype = LogisticRegression(max_iter=1000)
model_subtype.fit(X_train_subtype, y_train_subtype)

model_binary = LogisticRegression(max_iter=1000)
model_binary.fit(X_train_binary, y_train_binary)

# Оценим качество
y_pred_subtype = model_subtype.predict(X_test_subtype)
print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

y_pred_binary = model_binary.predict(X_test_binary)
print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

Accuracy subtype: 0.4011
Accuracy binary class: 0.6944


In [77]:
def predict_dichotomy(image_path, model):
    landmarks = get_landmarks(image_path)
    
    X_input = pd.DataFrame([landmarks], columns=columns)

    prediction = model.predict(X_input)[0]
    proba = model.predict_proba(X_input)[0]

    return prediction, proba

In [78]:
joblib.dump(model_subtype, 'subtype_classifier.pkl')

joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

subtypes = {
    0 : "Шизоидный",
    1 : "Параноидальный",
    2 : "Нарциссический",
    3 : "Психопатический",
    4 : "Компульсивный",
    5 : "Истерический",
    6 : "Депрессивный",
    7 : "Мазохистический"
}

In [79]:
model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

In [80]:
# 2. Инициализация детектора и предиктора
result_subtype, confidence_subtype = predict_dichotomy("../all_data/12/16 (2).jpg", model_subtype_file)
print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

result_binary, confidence_binary = predict_dichotomy("../all_data/12/16 (2).jpg", model_binary_file)
print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")


Подтип личности: Шизоидный (Уверенность: 0.27)
Дихотомия: Этика (Уверенность: 0.71)


In [81]:
def normalize_landmarks(landmarks):
    """
    landmarks: np.array с shape (N, 2), координаты в пикселях
    Возвращает: нормализованные landmarks и параметры центра и масштаба
    """
    left_eye = landmarks[33]
    right_eye = landmarks[263]
    center = (left_eye + right_eye) / 2
    scale = np.linalg.norm(left_eye - right_eye)

    normalized = (landmarks - center) / scale
    return normalized, center, scale


def denormalize_landmarks(normalized, center, scale):
    """
    Восстанавливает нормализованные точки обратно в пиксельные координаты
    """
    return (normalized * scale) + center

def draw_normalized_landmarks(image_path):
    # 1. Загрузка изображения
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Не удалось загрузить изображение")
    h, w = image.shape[:2]

    # 2. Инициализация MediaPipe
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)

    # 3. Обработка изображения
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    # 4. Проверка наличия лица
    if not results.multi_face_landmarks:
        print("Лицо не найдено")
        return

    # 5. Извлечение точек и нормализация
    face_landmarks = results.multi_face_landmarks[0]
    landmarks = np.array([
        [lm.x * w, lm.y * h]
        for lm in face_landmarks.landmark
    ])
    normalized, center, scale = normalize_landmarks(landmarks)
    restored = denormalize_landmarks(normalized, center, scale)

    # 6. Отображение денормализованных точек
    for point in restored.astype(int):
        x, y = point
        cv2.circle(image, (x, y), radius=1, color=(0, 255, 255), thickness=-1)

    # 7. Отображение изображения
    cv2.imshow("Normalized landmarks (restored)", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [82]:
draw_normalized_landmarks("../all_data/12/16 (2).jpg")