In [None]:
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder, Normalizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import face_recognition
import joblib
import cvlib as cv


# 데이터셋 경로 설정
male_dataset_path = 'male_dataset'
male_train_path = 'male_train_dataset'
male_test_path = 'male_test_dataset'

female_dataset_path = 'female_dataset'
female_train_path = 'female_train_dataset'
female_test_path = 'female_test_dataset'


# 데이터셋을 7:3으로 나누는 함수
def split_dataset(dataset_path, train_path, test_path, test_size=0.3, random_state=42):
    if not os.path.exists(train_path):
        os.makedirs(train_path)
    if not os.path.exists(test_path):
        os.makedirs(test_path)

    for folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, folder)
        if os.path.isdir(class_path):
            images = [f for f in os.listdir(class_path) if f.lower().endswith(('png', 'jpg', 'jpeg'))]
            train_images, test_images = train_test_split(images, test_size=test_size, random_state=random_state)

            train_folder = os.path.join(train_path, folder)
            test_folder = os.path.join(test_path, folder)

            if not os.path.exists(train_folder):
                os.makedirs(train_folder)
            if not os.path.exists(test_folder):
                os.makedirs(test_folder)

            for img in train_images:
                src = os.path.join(class_path, img)
                dst = os.path.join(train_folder, img)
                os.link(src, dst)

            for img in test_images:
                src = os.path.join(class_path, img)
                dst = os.path.join(test_folder, img)
                os.link(src, dst)

# 임베딩 벡터를 생성하는 함수
def create_embeddings(dataset_path):
    for folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, folder)
        if os.path.isdir(class_path):
            embeddings = []
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                if img_path.lower().endswith(('png', 'jpg', 'jpeg')):
                    image = face_recognition.load_image_file(img_path)
                    face_encodings = face_recognition.face_encodings(image)
                    if face_encodings:
                        embeddings.append(face_encodings[0])
            if embeddings:
                np.save(os.path.join(class_path, 'embeddings.npy'), embeddings)

# 데이터 로드 함수
def load_data(dataset_path):
    X, y = [], []
    for folder in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, folder)
        if os.path.isdir(class_path):
            embeddings_path = os.path.join(class_path, 'embeddings.npy')
            if os.path.exists(embeddings_path):
                embeddings = np.load(embeddings_path)
                labels = [folder] * len(embeddings)
                X.extend(embeddings)
                y.extend(labels)
    return np.array(X), np.array(y)


# 예측
def predict_top_similar_faces(image_path):
    model = joblib.load('face_recognition_model.pkl')
    in_encoder = joblib.load('in_encoder.pkl')
    out_encoder = joblib.load('out_encoder.pkl')
    
    # 이미지에서 임베딩 벡터 생성
    image = face_recognition.load_image_file(image_path)


    face_encodings = face_recognition.face_encodings(image)
    
    if face_encodings:
        embedding = face_encodings[0].reshape(1, -1)
        embedding = in_encoder.transform(embedding)
        
        # 예측
        probabilities = model.predict_proba(embedding)[0]
        predicted_indices = probabilities.argsort()[::-1][::]


        # 상위 예측 결과 출력
        for i, index in enumerate(predicted_indices):
            predict_name = out_encoder.inverse_transform([index])[0]
            probability = probabilities[index]        
            print(f"Rank {i+1}: Predicted Name: {predict_name}, Probability: {probability * 100:.2f}%")
    else:
        print("No face found in the image.")

# --------------------------------------------------------
# 안되어있는 경우 
# 데이터셋을 7:3으로 나눔
# split_dataset(female_dataset_path, female_train_path, female_test_path)

# 훈련 데이터셋에 대해 임베딩 벡터 생성
# create_embeddings(female_train_path)
# create_embeddings(female_test_path)

# --------------------------------------------------------


# 데이터 분할
X_train, y_train = load_data(male_train_path)
X_test, y_test = load_data(male_test_path)

# 벡터 일반화
in_encoder = Normalizer(norm='l2')
X_train = in_encoder.transform(X_train)
X_test = in_encoder.transform(X_test)

# 목표 레이블 암호화
out_encoder = LabelEncoder()
out_encoder.fit(y_train)
y_train = out_encoder.transform(y_train)
y_test = out_encoder.transform(y_test)

# 훈련
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# 평가
train_accuracy = model.score(X_train, y_train)
test_accuracy = model.score(X_test, y_test)

print(f'Training accuracy: {train_accuracy}')
print(f'Test accuracy: {test_accuracy}')

joblib.dump(model, 'male_face_recognition_model.pkl')
joblib.dump(in_encoder, 'male_in_encoder.pkl')
joblib.dump(out_encoder, 'male_out_encoder.pkl')

# 테스트 데이터 예측 및 출력
for i, test_embedding in enumerate(X_test):
    test_embedding = test_embedding.reshape(1, -1)
    prediction = model.predict(test_embedding)
    probability = model.predict_proba(test_embedding)
    predict_name = out_encoder.inverse_transform(prediction)[0]
    true_name = out_encoder.inverse_transform([y_test[i]])[0]
    print(f'org:{true_name}, predict:{predict_name}, proba: {np.max(probability) * 100:.2f}%')


# predict_top_similar_faces('C:/Users/1201q/python/test/1.jpg')
