In [2]:
import cv2
#from imutils import face_utils
import pandas as pd
import numpy as np
import mediapipe as mp

import os
import re

#from sklearn.model_selection import train_test_split
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import accuracy_score

#import joblib

In [1]:
# Пути до ресурсов
DATASET_BINARY_DATA = r"..\binary_data/"
DATASET_ALL_DATA = r"..\all_data/"
OUTPUT_LANDMARKS_FOLDER = r"..\output_landmarks/"

In [3]:
# Загружаем модель
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

In [11]:
def write_output_landmarks_file(image_name, label, landmarks):
    image_name = os.path.splitext(image_name)[0]
    # Папка для сохранения txt-файлов
    output_dir = OUTPUT_LANDMARKS_FOLDER
    os.makedirs(output_dir, exist_ok=True)

    # Путь к файлу, куда сохранить координаты
    output_path = os.path.join(output_dir, f'{image_name}.txt')

    # Сохраняем в файл
    with open(output_path, 'w') as f:
        f.write(f'{label}\n')
        for x, y in landmarks:
            f.write(f'{x} {y}\n')


In [5]:
def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    left_eye = points[33]
    right_eye = points[263]

    center_x = (left_eye[0] + right_eye[0]) / 2
    center_y = (left_eye[1] + right_eye[1]) / 2

    points[:, 0] -= center_x
    points[:, 1] -= center_y

    eye_dist = np.linalg.norm(left_eye - right_eye)
    if eye_dist > 0:
        points = points / eye_dist
    return points

def get_landmarks(image_path, landmarks = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    height, width = image_rgb.shape[:2]

    # Детектируем лицо
    faces = face_mesh.process(image_rgb)

    if not faces.multi_face_landmarks:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = faces.multi_face_landmarks[0]

    landmarks = np.array([(p.x * width, p.y * height) for p in shape.landmark])
    #landmarks = np.concatenate((np.concatenate((landmarks[pack_identify["first"]], landmarks[pack_identify["second"]]), axis=0), landmarks[pack_identify["third"]]), axis=0).flatten()

    return landmarks

In [13]:
def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]

def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k // 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                label = get_label_func(k) % 2
                
                all_landmarks.append(landmarks.flatten())
                all_labels.append(label)

                write_output_landmarks_file(f"{label_dir}-{filename}", label, landmarks)

        k += 1

In [14]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

In [15]:
columns = []
for idx in list(range(0, 468)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df = pd.DataFrame(all_landmarks, columns=columns)
df['label'] = all_labels
df = df.sample(frac=1).reset_index(drop=True)

print(df["label"].unique())
print(df["label"].value_counts())

df

[0 1]
label
0    37705
1    31426
Name: count, dtype: int64


Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y463,x464,y464,x465,y465,x466,y466,x467,y467,label
0,92.910856,266.621376,88.075735,246.063846,91.764092,250.598695,84.365122,219.137609,87.614059,238.904416,...,193.034128,110.306777,195.429042,107.203335,197.650697,151.514515,185.957915,155.120388,182.965952,0
1,111.758426,160.397383,108.462409,135.561007,110.553331,144.232113,103.213017,118.062435,108.011724,129.209579,...,106.345118,123.536552,107.808556,121.156057,108.413977,156.458467,104.057682,159.917933,101.389877,0
2,234.275858,293.036871,242.008876,255.076733,235.866432,266.409531,231.296272,219.434023,242.949504,244.021540,...,195.182419,247.929514,197.024403,246.965439,198.348713,292.541488,192.395239,296.739399,188.158951,0
3,103.939769,167.150181,102.293423,148.317600,104.261271,154.159883,98.921556,125.511217,102.209035,141.352381,...,109.175716,122.235682,110.469157,119.425097,111.418324,159.685938,105.290354,163.345581,102.844210,1
4,122.708656,168.968100,122.673849,156.550365,121.860239,158.327482,113.820602,133.356150,122.206509,150.678442,...,104.822012,128.478775,107.686390,127.104698,110.336597,161.764370,93.877850,164.052166,91.810913,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69126,103.130077,166.931284,98.045822,147.055048,102.122030,152.295475,95.300753,125.173212,97.543074,140.678467,...,106.955198,118.905445,108.576277,116.029118,109.937056,153.725134,104.601673,157.310823,101.788258,1
69127,105.120437,166.866302,103.717120,141.686193,105.070649,149.651356,98.810726,119.404762,103.419356,134.451288,...,102.427284,118.449347,104.305191,116.208454,105.390223,151.499516,99.428204,154.828016,97.148232,0
69128,112.566141,169.852272,112.043966,148.544695,112.012938,153.224174,106.795588,122.848740,111.970674,141.556484,...,98.721245,122.817537,101.206789,121.356195,103.299213,153.380680,94.774038,156.194176,92.971734,0
69129,115.556690,170.838436,117.845461,151.049265,115.829327,155.622681,110.524559,127.278547,117.895262,144.346949,...,105.530199,124.168877,107.634346,123.085955,109.372971,155.639250,100.499637,158.396229,98.798957,0


In [9]:
df.to_csv("data.csv", sep=';', index=False)

In [None]:
# Разделим данные
#

# X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
#     df_binary.drop(columns=['label']),  # все колонки кроме 'label'
#     df_binary['label'],                 # сами метки
#     test_size=0.2,                # 20% на тест
#     stratify=df_binary['label'],         # сбалансированная разбивка по классам
#     random_state=42
# )

# Обучим модель
# model_subtype = LogisticRegression(max_iter=1000)
# model_subtype.fit(X_train_subtype, y_train_subtype)

# model_binary = LogisticRegression(max_iter=1000)
# model_binary.fit(X_train_binary, y_train_binary)

# # Оценим качество
# # y_pred_subtype = model_subtype.predict(X_test_subtype)
# # print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

# y_pred_binary = model_binary.predict(X_test_binary)
# print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

In [None]:
# def predict_dichotomy(image_path, model):
#     landmarks = get_landmarks(image_path)
    
#     X_input = pd.DataFrame([landmarks], columns=columns)

#     prediction = model.predict(X_input)[0]
#     proba = model.predict_proba(X_input)[0]

#     return prediction, proba

In [None]:
# joblib.dump(model_subtype, 'subtype_classifier.pkl')

# joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

# subtypes = {
#     0 : "Шизоидный",
#     1 : "Параноидальный",
#     2 : "Нарциссический",
#     3 : "Психопатический",
#     4 : "Компульсивный",
#     5 : "Истерический",
#     6 : "Депрессивный",
#     7 : "Мазохистический"
# }

In [None]:
# model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
# model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

In [None]:
# # 2. Инициализация детектора и предиктора
# result_subtype, confidence_subtype = predict_dichotomy("../all_data/12/16 (2).jpg", model_subtype_file)
# print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

# result_binary, confidence_binary = predict_dichotomy("../all_data/12/16 (2).jpg", model_binary_file)
# print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")
