In [1]:
import cv2
import dlib
from matplotlib import pyplot as plt
from imutils import face_utils
import pandas as pd
import numpy as np

import os
import re

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib

In [2]:
# Пути до ресурсов

DATASET_BINARY_DATA = "../binary_data/"
DATASET_ALL_DATA = "../all_data/"
PREDICTOR_PATH = "../models/shape_predictor_68_face_landmarks.dat"

In [3]:
# Загружаем модель
predictor = dlib.shape_predictor(PREDICTOR_PATH)
detector = dlib.get_frontal_face_detector()

In [4]:
def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]

In [5]:
#Функция для нормализации изображения (Приведение к единому отображению)

def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    center_x = (points[36][0] + points[45][0]) / 2
    center_y = (points[36][1] + points[45][1]) / 2
    points[:, 0] -= center_x
    points[:, 1] -= center_y
    eye_dist = np.linalg.norm(points[36] - points[45])
    if eye_dist > 0:
        points = points / eye_dist
    return points

In [6]:
def get_landmarks(image_path, landmarks = [], all_labels = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    # Преобразуем изображение в оттенки серого
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Детектируем лицо
    faces = detector(gray)

    if len(faces) == 0:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = predictor(gray, faces[0])

    landmarks = normalize_landmarks(np.array([(p.x, p.y) for p in shape.parts()]))
    landmarks = np.concatenate((np.concatenate((landmarks[0:17], landmarks[17:27]), axis=0), landmarks[60:68]), axis=0).flatten()

    return landmarks

In [7]:
def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k % 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue
        
        label = get_label_func(k)
        #label = k % 8
        #label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                
                all_landmarks.append(landmarks)
                all_labels.append(label)

        k += 1

In [None]:
# def get_label_for_binary_data(label_dir):
#     return 0 if label_dir == 'T' else 1



In [8]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

all_landmarks_1 = []
all_labels_1 = []

build_dataframe(DATASET_BINARY_DATA, all_landmarks_1, all_labels_1) # получаем числовой код 

In [9]:
print("OpenCV version:", cv2.__version__)
print("dlib version:", dlib.__version__)

# all_landmarks = []
# all_labels = []

# for label_dir in os.listdir(DATASET_DIR):
#     label_path = os.path.join(DATASET_DIR, label_dir)
#     if not os.path.isdir(label_path):
#         continue
    
#     label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

#     for filename in os.listdir(label_path):
#         if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
#             image_path = os.path.join(label_path, filename)

#             landmarks = get_landmarks(image_path)
#             if landmarks is None: continue
            
#             all_landmarks.append(landmarks)
#             all_labels.append(label)


# Сохраняем в CSV
columns = []
for idx in list(range(0, 17)) + list(range(17, 27)) + list(range(60, 68)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df_all = pd.DataFrame(all_landmarks, columns=columns)
df_all['label'] = all_labels
df_all = df_all.sample(frac=1).reset_index(drop=True)

df_binary = pd.DataFrame(all_landmarks_1, columns=columns)
df_binary['label'] = all_labels_1
df_binary = df_binary.sample(frac=1).reset_index(drop=True)

df_all

df_binary

OpenCV version: 4.11.0
dlib version: 19.22.99


Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y63,x64,y64,x65,y65,x66,y66,x67,y67,label
0,-0.706260,-0.039678,-0.690389,0.161355,-0.648067,0.357098,-0.589873,0.547550,-0.499937,0.727422,...,0.796196,0.261872,0.764454,0.055549,0.801486,-0.023807,0.806777,-0.097871,0.785615,1
1,-0.619517,0.322149,-0.652558,0.495614,-0.636038,0.702120,-0.578216,0.941667,-0.512134,1.164693,...,1.049049,0.156944,1.156433,-0.041301,1.098611,-0.132164,1.115131,-0.189985,1.123392,0
2,-0.687165,-0.026029,-0.707988,0.150968,-0.687165,0.348788,-0.655930,0.536197,-0.603872,0.734017,...,0.775663,0.239466,0.744428,0.010412,0.765251,-0.093704,0.765251,-0.176997,0.744428,0
3,-0.778351,-0.082474,-0.747423,0.123711,-0.706186,0.319588,-0.654639,0.515464,-0.572165,0.690722,...,0.701031,0.262887,0.670103,0.087629,0.721649,0.005155,0.742268,-0.067010,0.721649,0
4,-0.817590,0.017646,-0.805827,0.241160,-0.782299,0.464674,-0.747007,0.676424,-0.688188,0.899938,...,0.794063,0.311743,0.770535,0.135285,0.829354,0.052938,0.852882,-0.041174,0.841118,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1737,-1.203801,0.158549,-1.156824,0.416926,-1.098102,0.675303,-1.015891,0.921936,-0.839725,1.121591,...,0.710536,0.475648,0.722281,0.381693,0.745770,0.299482,0.757514,0.182038,0.757514,1
1738,-0.616581,0.024997,-0.633245,0.208304,-0.616581,0.391612,-0.549924,0.558256,-0.483266,0.741564,...,0.774892,0.233301,0.758228,0.016664,0.791557,-0.066657,0.791557,-0.133315,0.791557,0
1739,-0.668371,0.067399,-0.679604,0.258362,-0.657137,0.460558,-0.645904,0.662754,-0.600972,0.864950,...,0.797551,0.219046,0.797551,0.016850,0.797551,-0.061782,0.820018,-0.140414,0.808785,1
1740,-0.571267,0.107113,-0.618872,0.321338,-0.595069,0.559365,-0.547464,0.797393,-0.476056,1.059224,...,1.011618,0.142817,1.106829,-0.119014,1.154435,-0.238028,1.130632,-0.285633,1.106829,0


In [10]:
print(df_all["label"].unique())

print(df_binary["label"].unique())


[0 1 3 2 6 4 7 5]
[1 0]


In [19]:
# Разделим данные
X_train_subtype, X_test_subtype, y_train_subtype, y_test_subtype = train_test_split(
    df_all.drop(columns=['label']),  # все колонки кроме 'label'
    df_all['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    #stratify=df['label'],         # сбалансированная разбивка по классам
    random_state=42
)

X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
    df_binary.drop(columns=['label']),  # все колонки кроме 'label'
    df_binary['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    stratify=df_binary['label'],         # сбалансированная разбивка по классам
    random_state=42
)

# Обучим модель
model_subtype = LogisticRegression(max_iter=1000)
model_subtype.fit(X_train_subtype, y_train_subtype)

model_binary = LogisticRegression(max_iter=1000)
model_binary.fit(X_train_binary, y_train_binary)

# Оценим качество
y_pred_subtype = model_subtype.predict(X_test_subtype)
print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

y_pred_binary = model_binary.predict(X_test_binary)
print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

Accuracy subtype: 0.4986
Accuracy binary class: 0.7364


In [20]:
def predict_dichotomy(image_path, model):
    landmarks = get_landmarks(image_path)
    
    X_input = pd.DataFrame([landmarks], columns=columns)

    prediction = model.predict(X_input)[0]
    proba = model.predict_proba(X_input)[0]

    return prediction, proba

In [21]:
joblib.dump(model_subtype, 'subtype_classifier.pkl')

joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

['logic_ethics_classifier.pkl']

In [25]:
subtypes = {
    0 : "Шизоидный",
    1 : "Параноидальный",
    2 : "Нарциссический",
    3 : "Психопатический",
    4 : "Компульсивный",
    5 : "Истерический",
    6 : "Депрессивный",
    7 : "Мазохистический"
}

In [26]:
model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели


# 2. Инициализация детектора и предиктора
result_subtype, confidence_subtype = predict_dichotomy("../all_data/17 Гю/Анастасия Семеренко (3).jpg", model_subtype_file)
print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

result_binary, confidence_binary = predict_dichotomy("../all_data/17 Гю/Анастасия Семеренко (3).jpg", model_binary_file)
print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")



Подтип личности: Параноидальный (Уверенность: 0.34)
Дихотомия: Этика (Уверенность: 0.72)
