In [1]:
import cv2 as cv
import numpy as np
import os
from skimage.transform import resize
from skimage import io

In [2]:
face_cascade = cv.CascadeClassifier('haarcascade_frontalface_default.xml')

def preprocess(image_path):
    img = cv.imread(image_path, cv.IMREAD_GRAYSCALE)
    faces = face_cascade.detectMultiScale(img, 1.3, 5)
    for (x, y, w, h) in faces:
        img = img[y:y+h, x:x+w]     # Crop the face
    img = img / 255.0               # Normalize
    return img

In [3]:
# Step 1: Prepare training data
# Assuming you have some face images stored in a directory "faces"
# Each subdirectory in "faces" represents a person
faces_dir = r'G:\datasets\faces\FaceData\FaceDataset'
faces_dirs = os.listdir(faces_dir)
faces, labels = [], []
MinImagesPerLabel = 40

for label in range(800):
    subject_dir_path = faces_dir + "/" + str(label)
    subject_images_names = os.listdir(subject_dir_path)
    labelfaces = []
    
    for image_name in subject_images_names:
        image_path = subject_dir_path + "/" + image_name
        image = preprocess(image_path)
        if image.shape[0] and image.shape[1]:
            # do NOT use np.resize.. use either skimage or cv resize
            image = resize(image, (100, 100))
            labelfaces.append(image)

    if len(labelfaces) > MinImagesPerLabel:
        labels.extend([label] * len(labelfaces))
        faces.extend(labelfaces)

np.save('TrainingFacesArr', np.array(faces))
np.save('TrainingLabelsArr', np.array(labels))

In [7]:
len(faces)

567

In [8]:
faces = np.load('TrainingFacesArr.npy')
labels = np.load('TrainingLabelsArr.npy')
# Step 2: Train the EigenFace Recognizer keeping only 50 PCA components
face_recognizer = cv.face.EigenFaceRecognizer_create(num_components=50)
face_recognizer.train(faces, labels)
face_recognizer.write('FaceRecogModel')

In [None]:
face_recognizer = cv.face.EigenFaceRecognizer_create()
face_recognizer.read('FaceRecogModel')

In [None]:
comps = face_recognizer.getEigenVectors()               # comps(10000x50)
mean = face_recognizer.getMean()                
first_comp = np.reshape(mean + comps[:,0], (100, 100))  # reshape to 100x100
io.imshow(first_comp)


In [None]:
# try the same analysis usig sk-learn PCA class
# faces = np.load('./TrainingFacesArr.npy')
# labels = np.load('./TrainingLabelsArr.npy')
X, y = np.array(faces).reshape((-1,10000)), np.array(labels)
from sklearn.decomposition import PCA
comp = PCA(50)
pca_first_comp = comp.mean_.reshape(100, 100) \
    + comp.components_[0].reshape(100, 100)
io.imshow(pca_first_comp)


In [54]:

test_img = preprocess(r'G:\datasets\faces\ExtractedFaces\ExtractedFaces\117\29.jpg')
test_img = resize(test_img, (100, 100))
label, confidence = face_recognizer.predict(test_img)
print(label, confidence)

117 3.532263644629077


In [49]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

In [55]:
Xt = comp.fit_transform(X)
test_img = test_img.reshape(-1, 10000)
transform_im = comp.transform(test_img)
n_labels = np.unique(y)
# mean_labels = np.zeros((len(n_labels), transform_im.shape[1]))
# for id, label in enumerate(n_labels):
#     mean_labels[id] = Xt[labels==label].mean(axis=0)
# norm = np.linalg.norm(transform_im - mean_labels, axis=1)
# prediction = np.argmin(norm)
# n_labels[prediction], norm[prediction]
from sklearn.svm import SVC
clf = SVC()
clf.fit(Xt, y)
y_pred = clf.predict(transform_im)
y_pred

array([117])