In [19]:
import cv2
from imutils import face_utils
import pandas as pd
import numpy as np
import mediapipe as mp

import os
import re

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib
# Пути до ресурсов

DATASET_BINARY_DATA = "../binary_data/"
DATASET_ALL_DATA = "../all_data/"
#PREDICTOR_PATH = "../models/shape_predictor_68_face_landmarks.dat"

# Загружаем модель
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]
#Функция для нормализации изображения (Приведение к единому отображению)

def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    center_x = (points[36][0] + points[45][0]) / 2
    center_y = (points[36][1] + points[45][1]) / 2
    points[:, 0] -= center_x
    points[:, 1] -= center_y
    eye_dist = np.linalg.norm(points[36] - points[45])
    if eye_dist > 0:
        points = points / eye_dist
    return points

def get_landmarks(image_path, landmarks = [], all_labels = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    # Преобразуем изображение в оттенки серого
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Детектируем лицо
    faces = face_mesh.process(image_rgb)

    if not faces.multi_face_landmarks:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = faces.multi_face_landmarks[0]

    landmarks = normalize_landmarks(np.array([(p.x, p.y) for p in shape.landmark]))
    landmarks = np.concatenate((np.concatenate((landmarks[0:17], landmarks[17:27]), axis=0), landmarks[60:68]), axis=0).flatten()

    return landmarks


def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k % 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue
        
        label = get_label_func(k)
        #label = k % 8
        #label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                
                all_landmarks.append(landmarks)
                all_labels.append(label)

        k += 1

In [20]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

all_landmarks_1 = []
all_labels_1 = []

build_dataframe(DATASET_BINARY_DATA, all_landmarks_1, all_labels_1) # получаем числовой код

In [21]:
columns = []
for idx in list(range(0, 17)) + list(range(17, 27)) + list(range(60, 68)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df_all = pd.DataFrame(all_landmarks, columns=columns)
df_all['label'] = all_labels
df_all = df_all.sample(frac=1).reset_index(drop=True)

df_binary = pd.DataFrame(all_landmarks_1, columns=columns)
df_binary['label'] = all_labels_1
df_binary = df_binary.sample(frac=1).reset_index(drop=True)

df_all

df_binary

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y63,x64,y64,x65,y65,x66,y66,x67,y67,label
0,1.421645,2.020620,0.940160,0.510841,1.358261,0.884065,0.793236,-0.993507,0.887589,0.077204,...,-3.831996,0.035702,0.409348,-0.499493,-3.602033,-0.621950,-3.945374,-0.918763,-5.854126,1
1,0.639566,0.902554,0.693624,0.081237,0.640389,0.364867,0.462677,-0.542010,0.695520,-0.134053,...,-1.565076,-0.051534,0.245219,-0.414150,-1.527213,-0.453886,-1.701053,-0.739790,-2.517079,1
2,0.644039,0.804945,0.694599,0.193358,0.668538,0.400543,0.543952,-0.424248,0.709927,-0.005291,...,-1.691665,-0.026562,0.200032,-0.253162,-1.560308,-0.279928,-1.723315,-0.450250,-2.446910,1
3,0.776319,1.184775,0.747329,0.336128,0.766730,0.587388,0.524529,-0.680148,0.740493,0.034725,...,-2.572595,0.003106,0.291039,-0.483542,-2.447317,-0.537297,-2.684524,-0.798505,-3.827906,0
4,2.935879,3.087484,0.142877,0.699132,2.530264,1.527347,0.889909,-1.926883,-0.027956,-0.147670,...,-6.366861,0.495526,0.784492,0.583288,-6.085583,0.332862,-6.720677,0.612719,-10.163096,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1860,0.281830,0.720210,0.588413,0.323405,0.420283,0.380552,0.555937,-0.194379,0.658661,0.203589,...,-1.564714,-0.146492,0.112617,0.107223,-1.278719,0.130430,-1.393368,0.122717,-2.041628,0
1861,0.764573,0.899107,0.726759,0.252106,0.741012,0.455610,0.531740,-0.516211,0.724630,0.017445,...,-1.922736,0.003521,0.221262,-0.366055,-1.820922,-0.416724,-2.012563,-0.700212,-2.963850,0
1862,0.326869,0.606553,0.599449,0.145609,0.326469,0.306657,0.359857,-0.288660,0.626257,0.004849,...,-1.278908,-0.143425,0.156791,-0.257459,-1.133107,-0.253077,-1.249317,-0.537426,-1.833955,1
1863,0.642705,0.892885,0.697289,0.280373,0.653786,0.448726,0.483755,-0.500888,0.701096,0.055663,...,-2.101212,0.008374,0.216644,-0.454160,-1.896354,-0.497427,-2.100801,-0.786222,-3.182133,0


In [29]:
df_all

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y63,x64,y64,x65,y65,x66,y66,x67,y67,label
0,0.438262,0.498523,0.637263,0.161150,0.455764,0.243451,0.409143,-0.289301,0.660257,0.037831,...,-1.262681,-0.083098,0.125583,-0.389864,-1.161053,-0.402244,-1.276348,-0.679077,-1.893087,1
1,0.393594,0.404858,0.629728,0.099607,0.418667,0.179168,0.389621,-0.221387,0.650664,0.005706,...,-0.842921,-0.104426,0.094733,-0.339867,-0.788231,-0.352925,-0.867268,-0.677771,-1.304594,0
2,0.530640,0.728212,0.657851,0.095616,0.502568,0.310689,0.391109,-0.427591,0.665344,-0.079998,...,-1.284351,-0.101115,0.195391,-0.386795,-1.247969,-0.412220,-1.384137,-0.769531,-2.095668,3
3,0.511096,0.835048,0.666520,0.182561,0.531075,0.374058,0.431787,-0.469290,0.680284,-0.016432,...,-1.680696,-0.070830,0.206141,-0.411055,-1.594702,-0.439530,-1.762806,-0.728094,-2.548570,3
4,0.897867,1.416518,0.788359,0.383817,0.859077,0.662750,0.562600,-0.764995,0.772572,0.047171,...,-2.647451,-0.045500,0.307065,-0.420902,-2.493189,-0.497886,-2.765803,-0.828291,-4.273260,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1861,0.617906,0.867003,0.696188,0.234612,0.624715,0.439063,0.458123,-0.526774,0.697250,-0.002022,...,-1.742163,-0.061193,0.215906,-0.376215,-1.704873,-0.420672,-1.887345,-0.752303,-2.773978,1
1862,0.583532,0.721534,0.668444,0.318531,0.611927,0.391860,0.476652,-0.380171,0.679141,0.143914,...,-2.016281,-0.014904,0.153886,-0.497285,-1.790243,-0.530896,-1.965752,-0.744831,-2.910423,1
1863,0.618773,0.865218,0.699783,0.233897,0.619998,0.417796,0.448529,-0.526299,0.701342,0.007297,...,-1.835119,-0.087847,0.216295,-0.447468,-1.771216,-0.492238,-1.958786,-0.788610,-2.794588,1
1864,0.714266,0.992115,0.719471,0.283097,0.698985,0.471965,0.507421,-0.550662,0.722213,0.039975,...,-1.994632,-0.043684,0.219901,-0.362429,-1.804182,-0.416120,-2.019103,-0.766191,-3.235187,0


In [22]:
print(df_all["label"].unique())

print(df_binary["label"].unique())

[1 0 3 2 4 6 7 5]
[1 0]


In [31]:
print(df_all["label"].value_counts())

print(df_binary["label"].value_counts())

label
0    651
1    490
3    427
2    292
4      3
6      1
7      1
5      1
Name: count, dtype: int64
label
0    1209
1     656
Name: count, dtype: int64


In [23]:
# Разделим данные
X_train_subtype, X_test_subtype, y_train_subtype, y_test_subtype = train_test_split(
    df_all.drop(columns=['label']),  # все колонки кроме 'label'
    df_all['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    #stratify=df['label'],         # сбалансированная разбивка по классам
    random_state=42
)

X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    df_binary.drop(columns=['label']),  # все колонки кроме 'label'
    df_binary['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    stratify=df_binary['label'],         # сбалансированная разбивка по классам
    random_state=42
)

# Обучим модель
model_subtype = LogisticRegression(max_iter=1000)
model_subtype.fit(X_train_subtype, y_train_subtype)

model_binary = LogisticRegression(max_iter=1000)
model_binary.fit(X_train_binary, y_train_binary)

# Оценим качество
y_pred_subtype = model_subtype.predict(X_test_subtype)
print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

y_pred_binary = model_binary.predict(X_test_binary)
print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy subtype: 0.4545
Accuracy binary class: 0.7480


In [24]:
def predict_dichotomy(image_path, model):
    landmarks = get_landmarks(image_path)
    
    X_input = pd.DataFrame([landmarks], columns=columns)

    prediction = model.predict(X_input)[0]
    proba = model.predict_proba(X_input)[0]

    return prediction, proba

In [25]:
joblib.dump(model_subtype, 'subtype_classifier.pkl')

joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

subtypes = {
    0 : "Шизоидный",
    1 : "Параноидальный",
    2 : "Нарциссический",
    3 : "Психопатический",
    4 : "Компульсивный",
    5 : "Истерический",
    6 : "Депрессивный",
    7 : "Мазохистический"
}

In [26]:
model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

In [32]:
# 2. Инициализация детектора и предиктора
result_subtype, confidence_subtype = predict_dichotomy("../all_data/12/16 (2).jpg", model_subtype_file)
print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

result_binary, confidence_binary = predict_dichotomy("../all_data/12/16 (2).jpg", model_binary_file)
print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")


Подтип личности: Параноидальный (Уверенность: 0.30)
Дихотомия: Этика (Уверенность: 0.82)
