In [2]:
import cv2
#from imutils import face_utils
import pandas as pd
import numpy as np
import mediapipe as mp

import os
import re

#from sklearn.model_selection import train_test_split
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import accuracy_score

#import joblib

In [3]:
# Пути до ресурсов
DATASET_BINARY_DATA = r"..\binary_data/"
DATASET_ALL_DATA = r"..\all_data/"
OUTPUT_LANDMARKS_FOLDER = r"..\output_landmarks/"

In [4]:
# Загружаем модель
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)

In [5]:
def write_output_landmarks_file(image_name, label, landmarks):
    image_name = os.path.splitext(image_name)[0]
    # Папка для сохранения txt-файлов
    output_dir = OUTPUT_LANDMARKS_FOLDER
    os.makedirs(output_dir, exist_ok=True)

    # Путь к файлу, куда сохранить координаты
    output_path = os.path.join(output_dir, f'{image_name}.txt')

    # Сохраняем в файл
    with open(output_path, 'w') as f:
        f.write(f'{label}\n')
        for x, y in landmarks:
            f.write(f'{x} {y}\n')


In [6]:
def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    left_eye = points[33]
    right_eye = points[263]

    center_x = (left_eye[0] + right_eye[0]) / 2
    center_y = (left_eye[1] + right_eye[1]) / 2

    points[:, 0] -= center_x
    points[:, 1] -= center_y

    eye_dist = np.linalg.norm(left_eye - right_eye)
    if eye_dist > 0:
        points = points / eye_dist
    return points

def get_landmarks(image_path, landmarks = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    height, width = image_rgb.shape[:2]

    # Детектируем лицо
    faces = face_mesh.process(image_rgb)

    if not faces.multi_face_landmarks:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = faces.multi_face_landmarks[0]

    landmarks = np.array([(p.x * width, p.y * height) for p in shape.landmark])
    #landmarks = np.concatenate((np.concatenate((landmarks[pack_identify["first"]], landmarks[pack_identify["second"]]), axis=0), landmarks[pack_identify["third"]]), axis=0).flatten()

    return landmarks

In [7]:
def numeric_key(name):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', name)]

def build_dataframe(dataset_dir, all_landmarks, all_labels, get_label_func = lambda k : k // 8): # получаем числовой код папки
    k = 0
    for label_dir in sorted(os.listdir(dataset_dir), key=numeric_key):
        label_path = os.path.join(dataset_dir, label_dir)
        if not os.path.isdir(label_path):
            continue

        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(label_path, filename)

                landmarks = get_landmarks(image_path)
                if landmarks is None: continue
                label = get_label_func(k) % 2
                
                all_landmarks.append(landmarks.flatten())
                all_labels.append(label)

                write_output_landmarks_file(f"{label_dir}-{filename}", label, landmarks)

        k += 1

In [8]:
all_landmarks = []
all_labels = []

build_dataframe(DATASET_ALL_DATA, all_landmarks, all_labels) # получаем числовой код 

In [9]:
columns = []
for idx in list(range(0, 468)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df = pd.DataFrame(all_landmarks, columns=columns)
df['label'] = all_labels
df = df.sample(frac=1).reset_index(drop=True)

print(df["label"].unique())
print(df["label"].value_counts())

df

[0 1]
label
0    37705
1    31426
Name: count, dtype: int64


Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y463,x464,y464,x465,y465,x466,y466,x467,y467,label
0,82.496284,189.828593,82.437625,169.331224,82.501735,176.200645,77.326403,150.994408,82.392413,163.363002,...,138.681795,94.266122,139.727860,92.443223,140.264570,126.619704,134.553615,129.263014,133.001817,0
1,112.994268,167.404966,110.732281,142.377691,112.128254,151.656408,105.335488,122.046452,110.438082,135.329693,...,109.541105,124.576283,110.803157,122.330196,111.221945,157.314575,107.182814,160.430895,104.762091,0
2,268.596234,708.347188,256.474897,650.483825,267.897116,662.527197,246.023646,576.212983,254.365144,630.818251,...,498.739586,320.581175,506.156543,311.235193,512.929685,438.861181,477.050093,450.505108,466.036386,1
3,165.249800,259.360740,173.264166,243.017100,167.901039,247.210902,164.287026,212.829919,174.002350,234.661303,...,185.119973,177.969967,187.606475,177.121204,189.849444,214.486977,178.042334,217.828885,175.357257,0
4,117.984997,167.759567,121.981510,145.938213,118.029724,152.592945,113.508099,125.153666,122.256870,139.394766,...,109.488981,122.536850,110.981752,122.066372,111.940519,149.802574,106.457570,152.209038,105.074995,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69126,95.999153,161.403801,87.746441,140.599960,95.251766,146.273041,86.984941,118.435474,86.789844,134.077047,...,98.323619,112.676718,100.491138,109.343971,102.200430,147.566021,93.692204,151.285545,91.553591,0
69127,331.842839,567.044750,336.681487,501.124839,332.584748,518.742266,319.500231,434.842769,337.562141,481.554791,...,379.593048,366.530228,384.285761,361.939881,388.183244,462.476778,365.535549,470.453117,358.253900,0
69128,117.069929,165.522747,119.537062,149.163441,117.774925,153.255550,113.239241,125.248875,119.718374,142.460377,...,103.747228,127.199734,105.680289,126.051991,107.396670,158.329378,99.730293,161.231514,98.208483,1
69129,114.745523,169.863207,114.711504,150.262625,114.469269,154.403267,108.473511,125.403217,114.627123,143.494619,...,101.248672,127.188051,103.785206,125.272814,106.000837,161.810352,97.025605,164.632473,94.399130,0


In [10]:
df.to_csv("data.csv", sep=';', index=False)

In [None]:
# Разделим данные
#

# X_train_binary, X_test_binary, y_train_binary, y_test_binary = train_test_split(
#     df_binary.drop(columns=['label']),  # все колонки кроме 'label'
#     df_binary['label'],                 # сами метки
#     test_size=0.2,                # 20% на тест
#     stratify=df_binary['label'],         # сбалансированная разбивка по классам
#     random_state=42
# )

# Обучим модель
# model_subtype = LogisticRegression(max_iter=1000)
# model_subtype.fit(X_train_subtype, y_train_subtype)

# model_binary = LogisticRegression(max_iter=1000)
# model_binary.fit(X_train_binary, y_train_binary)

# # Оценим качество
# # y_pred_subtype = model_subtype.predict(X_test_subtype)
# # print(f"Accuracy subtype: {accuracy_score(y_test_subtype, y_pred_subtype):.4f}")

# y_pred_binary = model_binary.predict(X_test_binary)
# print(f"Accuracy binary class: {accuracy_score(y_test_binary, y_pred_binary):.4f}")

In [12]:
# def predict_dichotomy(image_path, model):
#     landmarks = get_landmarks(image_path)
    
#     X_input = pd.DataFrame([landmarks], columns=columns)

#     prediction = model.predict(X_input)[0]
#     proba = model.predict_proba(X_input)[0]

#     return prediction, proba

In [13]:
# joblib.dump(model_subtype, 'subtype_classifier.pkl')

# joblib.dump(model_binary, 'logic_ethics_classifier.pkl')

# subtypes = {
#     0 : "Шизоидный",
#     1 : "Параноидальный",
#     2 : "Нарциссический",
#     3 : "Психопатический",
#     4 : "Компульсивный",
#     5 : "Истерический",
#     6 : "Депрессивный",
#     7 : "Мазохистический"
# }

In [14]:
# model_subtype_file = joblib.load("subtype_classifier.pkl")  # путь к сохранённой модели
# model_binary_file = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

In [15]:
# # 2. Инициализация детектора и предиктора
# result_subtype, confidence_subtype = predict_dichotomy("../all_data/12/16 (2).jpg", model_subtype_file)
# print(f"Подтип личности: {subtypes[result_subtype]} (Уверенность: {max(confidence_subtype):.2f})")

# result_binary, confidence_binary = predict_dichotomy("../all_data/12/16 (2).jpg", model_binary_file)
# print(f"Дихотомия: {'Этика' if result_binary == 0 else 'Логика'} (Уверенность: {max(confidence_binary):.2f})")
