In [5]:
import os
import numpy as np
import dlib
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from skimage import io
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import joblib

# dlib의 얼굴 탐지기와 랜드마크 추출기 초기화
detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor('/content/drive/MyDrive/Colab Notebooks/shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('/content/drive/MyDrive/Colab Notebooks/dlib_face_recognition_resnet_model_v1.dat')

def find_landmarks(img_path):
    # 이미지 로드
    img = io.imread(img_path)

    # 이미지에서 얼굴 탐지
    dets = detector(img, 1)

    # 얼굴이 없는 경우 빈 배열 반환
    if len(dets) == 0:
        return np.empty(0, dtype=int)

    # 랜드마크 좌표를 저장할 배열 초기화
    landmarks = np.zeros((len(dets), 68, 2), dtype=int)
    for k, d in enumerate(dets):
        # 얼굴 영역에서 랜드마크 추출
        shape = sp(img, d)

        # dlib shape를 numpy 배열로 변환하여 저장
        for i in range(0, 68):
            landmarks[k][i] = (shape.part(i).x, shape.part(i).y)

    return landmarks

def encode_faces(img_path, landmarks):
    img = io.imread(img_path)
    face_descriptors = []
    for landmark in landmarks:
        # 얼굴 랜드마크를 dlib.full_object_detection 형태로 변환
        shape = dlib.full_object_detection(
            dlib.rectangle(0, 0, img.shape[1], img.shape[0]),
            [dlib.point(pt[0], pt[1]) for pt in landmark]
        )
        # 얼굴 랜드마크를 사용하여 얼굴의 특징 벡터 계산
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        face_descriptors.append(np.array(face_descriptor))

    return np.array(face_descriptors)

def train_classifier(training_data, labels):
    # 얼굴의 특징 벡터 추출
    face_descriptors = []
    for img_path, landmarks in training_data:
        descriptors = encode_faces(img_path, landmarks)
        if len(descriptors) > 0:  # 특징 벡터가 존재하는 경우에만 추가
            face_descriptors.extend(descriptors)
    if len(face_descriptors) == 0:
        raise ValueError("No valid face descriptors found in training data.")

    face_descriptors = np.array(face_descriptors)

    # PCA를 사용하여 특징 벡터 차원 축소
    pca = PCA(n_components=128)
    reduced_face_descriptors = pca.fit_transform(face_descriptors)

    # 대표 벡터 생성
    representative_vector = np.mean(reduced_face_descriptors, axis=0)

    # 분류기 훈련
    classifier = SVC()
    classifier.fit(reduced_face_descriptors, labels)

    return classifier, pca, representative_vector

# 이미지 로드 및 라벨 생성을 위한 부분
folder = '/content/drive/MyDrive/Colab Notebooks/linearAlgebra2_face_detection_datasets'
data = []
labels = []

for team_folder in os.listdir(folder):
    team_folder_path = os.path.join(folder, team_folder)
    if os.path.isdir(team_folder_path):
        for person_folder in os.listdir(team_folder_path):
            person_folder_path = os.path.join(team_folder_path, person_folder)
            person_initial = person_folder.split("_")[1]  # 개인 이니셜 추출
            if os.path.isdir(person_folder_path):
                for image_name in os.listdir(person_folder_path):
                    image_path = os.path.join(person_folder_path, image_name) # 이미지 경로
                    # 얼굴 랜드마크 찾기
                    landmarks = find_landmarks(image_path)
                    if len(landmarks) == 1:  # 얼굴이 하나라면
                        data.append((image_path, landmarks))
                        labels.append(person_initial)
                        """
                    else:
                        # 얼굴이 하나가 아닌 경우 경고 출력
                        print(f"얼굴이 하나가 아닌 이미지: {image_path}, 얼굴 개수: {len(landmarks)}")
"""

# 데이터를 학습용과 검증용으로 분리
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)

# 학습기 훈련
classifier, pca, representative_vector = train_classifier(train_data, train_labels)

# 모델 저장
joblib.dump(classifier, 'classifier.joblib')
joblib.dump(pca, 'pca.joblib')

print("훈련 완료!")
print("대표 벡터:", representative_vector)

# 검증 데이터로 성능 평가
true_labels = []
pred_labels = []

for img_path, landmarks in test_data:
    descriptors = encode_faces(img_path, landmarks)
    if len(descriptors) > 0:  # 특징 벡터가 존재하는 경우에만 예측
        reduced_descriptors = pca.transform(descriptors)
        predictions = classifier.predict(reduced_descriptors)
        true_labels.extend([test_labels[test_data.index((img_path, landmarks))]] * len(descriptors))
        pred_labels.extend(predictions)

# confusion matrix 출력
cm = confusion_matrix(true_labels, pred_labels)
print("Confusion Matrix:")
print(cm)

# 분류 보고서 출력
report = classification_report(true_labels, pred_labels)
print("Classification Report:")
print(report)


훈련 완료!
대표 벡터: [-9.53890904e-19  1.69580605e-18 -1.69580605e-18  6.35927269e-19
 -3.70957574e-18 -5.40538179e-18  1.58981817e-18  3.49759998e-18
  4.23951513e-19 -1.13407030e-17  2.64969696e-18 -3.97454543e-18
 -1.12082181e-17  9.43292116e-18 -5.08741816e-18  6.94220603e-18
  7.04819390e-18 -4.66346664e-18 -4.91518785e-18  5.29939391e-18
 -5.29939391e-19 -1.03868121e-17  4.16002422e-18  6.09430300e-19
  6.47850906e-18  4.50448483e-18  1.16586666e-18  6.27978179e-18
  6.62424239e-19 -7.31316360e-18  4.84894543e-18  1.19236363e-18
  8.42603632e-18 -6.51825451e-18  4.87544240e-18  1.86803635e-18
  1.32484848e-18 -3.10014544e-18 -3.60358786e-18 -3.65658180e-18
 -2.39797575e-18 -1.96077575e-18  8.77049692e-18 -4.61047270e-18
  3.12664241e-18  3.07364847e-18 -5.14041209e-18  3.28562423e-18
 -1.39771514e-18 -1.58981817e-19 -3.97454543e-18 -6.67723633e-18
  6.83621815e-18 -4.50448483e-18 -1.41162605e-17 -1.40433939e-18
  5.47162421e-18  6.47850906e-18 -6.67723633e-18 -7.94909087e-18
  5.2463999