In [7]:
import cv2
import dlib
from matplotlib import pyplot as plt
from imutils import face_utils
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import joblib

In [8]:
# Пути до ресурсов

DATASET_DIR = "../data/"
PREDICTOR_PATH = "../models/shape_predictor_68_face_landmarks.dat"

In [9]:
# Загружаем модель
predictor = dlib.shape_predictor(PREDICTOR_PATH)
detector = dlib.get_frontal_face_detector()

In [10]:
#Функция для нормализации изображения (Приведение к единому отображению)

def normalize_landmarks(points):
    points = points.astype(np.float32)
    #points = landmarks.reshape(-1, 2)
    center_x = (points[36][0] + points[45][0]) / 2
    center_y = (points[36][1] + points[45][1]) / 2
    points[:, 0] -= center_x
    points[:, 1] -= center_y
    eye_dist = np.linalg.norm(points[36] - points[45])
    if eye_dist > 0:
        points = points / eye_dist
    return points

In [18]:
def get_landmarks(image_path, landmarks = [], all_labels = []):
    image_array = np.fromfile(image_path, dtype=np.uint8)
    image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("Изображение не загружено!")
            
    # Преобразуем изображение в оттенки серого
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Детектируем лицо
    faces = detector(gray)

    if len(faces) == 0:
        return
        #raise ValueError("Лицо не найдено на изображении")

    shape = predictor(gray, faces[0])

    landmarks = normalize_landmarks(np.array([(p.x, p.y) for p in shape.parts()]))
    landmarks = np.concatenate((np.concatenate((landmarks[0:17], landmarks[17:27]), axis=0), landmarks[60:68]), axis=0).flatten()

    return landmarks

In [20]:
print("OpenCV version:", cv2.__version__)
print("dlib version:", dlib.__version__)

all_landmarks = []
all_labels = []

for label_dir in os.listdir(DATASET_DIR):
    label_path = os.path.join(DATASET_DIR, label_dir)
    if not os.path.isdir(label_path):
        continue
    
    label = 0 if label_dir == 'T' else 1  # T = 0, F = 1

    for filename in os.listdir(label_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(label_path, filename)

            landmarks = get_landmarks(image_path)
            if landmarks is None: continue
            
            all_landmarks.append(landmarks)
            all_labels.append(label)


# Сохраняем в CSV
columns = []
for idx in list(range(0, 17)) + list(range(17, 27)) + list(range(60, 68)):
    columns.append(f"x{idx}")
    columns.append(f"y{idx}")

df = pd.DataFrame(all_landmarks, columns=columns)
df['label'] = all_labels
df = df.sample(frac=1).reset_index(drop=True)

df

OpenCV version: 4.11.0
dlib version: 19.22.99


Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y63,x64,y64,x65,y65,x66,y66,x67,y67,label
0,-0.666619,0.089279,-0.678523,0.279742,-0.654716,0.458301,-0.607100,0.648764,-0.547580,0.839226,...,0.839226,0.202367,0.839226,-0.023808,0.851130,-0.095231,0.851130,-0.154751,0.839226,1
1,-1.663361,-0.108061,-1.671080,0.254714,-1.671080,0.648364,-1.578457,0.995701,-1.377773,1.273571,...,1.072887,0.212262,1.119199,0.219981,1.211823,0.142794,1.234978,0.065608,1.211823,1
2,-0.741941,0.000000,-0.720893,0.199956,-0.678797,0.399911,-0.647225,0.599867,-0.594605,0.789299,...,0.841919,0.247314,0.831395,0.068406,0.852443,-0.015786,0.862967,-0.110502,0.852443,0
3,-0.762911,0.014671,-0.753130,0.229852,-0.714007,0.445032,-0.665102,0.650431,-0.567293,0.836268,...,0.767802,0.283647,0.738459,0.136933,0.767802,0.058685,0.787364,-0.019562,0.787364,1
4,-0.616581,0.024997,-0.633245,0.208304,-0.616581,0.391612,-0.549924,0.558256,-0.483266,0.741564,...,0.774892,0.233301,0.758228,0.016664,0.791557,-0.066657,0.791557,-0.133315,0.791557,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1737,-0.711794,-0.035337,-0.691602,0.166590,-0.641120,0.358421,-0.580542,0.550252,-0.489674,0.721891,...,0.802662,0.287747,0.762276,0.136301,0.792566,0.055530,0.802662,-0.005048,0.802662,0
1738,-1.127234,-0.009802,-1.107630,0.264655,-1.068422,0.519508,-0.990006,0.774361,-0.833173,0.990006,...,0.715549,0.303863,0.715549,0.205843,0.833173,0.107822,0.852777,0.009802,0.852777,0
1739,-0.571267,0.107113,-0.618872,0.321338,-0.595069,0.559365,-0.547464,0.797393,-0.476056,1.059224,...,1.011618,0.142817,1.106829,-0.119014,1.154435,-0.238028,1.130632,-0.285633,1.106829,1
1740,-0.788889,-0.055556,-0.777778,0.144444,-0.755556,0.355556,-0.722222,0.577778,-0.688889,0.788889,...,0.788889,0.222222,0.800000,0.044444,0.822222,-0.033333,0.833333,-0.111111,0.811111,1


In [21]:
# Разделим данные
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=['label']),  # все колонки кроме 'label'
    df['label'],                 # сами метки
    test_size=0.2,                # 20% на тест
    stratify=df['label'],         # сбалансированная разбивка по классам
    random_state=42
)

# Обучим модель
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Оценим качество
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")

Accuracy: 0.7622


In [31]:
def predict_dichotomy(image_path):
    landmarks = get_landmarks(image_path)
    
    X_input = pd.DataFrame([landmarks], columns=columns)

    prediction = model.predict(X_input)[0]
    proba = model.predict_proba(X_input)[0]

    return prediction, proba

In [23]:
joblib.dump(model, 'logic_ethics_classifier.pkl')

['logic_ethics_classifier.pkl']

In [32]:
model = joblib.load("logic_ethics_classifier.pkl")  # путь к сохранённой модели

# 2. Инициализация детектора и предиктора
result, confidence = predict_dichotomy("../data/T/александр фролов евраз1.jpg")
print(f"Дихотомия: {'Логика' if result == 0 else 'Этика'} (уверенность: {max(confidence):.2f})")



Дихотомия: Логика (уверенность: 0.59)
