# Face Recognition - Generate Eigenface Classifiers

This notebook tests face detection and recognition from sample images

In [1]:
import cv2, cv2.face
import os, os.path
import numpy as np
from PIL import Image

In [2]:
# constants
CASCADE_PATH = './data/'
CASCADE_NAME = 'haarcascade_frontalface_default.xml'
SAMPLE_TRAINING = './data/sample_training/'
SAMPLE_TESTING = './data/sample_testing/'

In [3]:
#vSAMPLE_TRAINING = '/home/rico/datasets/lfw/raw/'

In [4]:
# Viola-Jones classifier for Haar feature extraction
cascader_file = os.path.join(CASCADE_PATH, CASCADE_NAME)
cascader = cv2.CascadeClassifier(cascader_file)
assert(not cascader.empty())

cascader_kwargs = { # setup detection args here
    'scaleFactor': 1.1,
    'minNeighbors': 2,
    'flags': 2
}

In [37]:
# function that gets images with respective labels from a given folder
def get_images_and_labels(folder,
                          detect_faces=False,
                          median_blur=3,
                          equalize_hist=True,
                          debug=False,
                          debug_faces=True,
                          debug_accuracy=False):
    
    labels = []
    images = []
    added = 0
    expected = 0
    
    for label in os.listdir(folder):
        
        if debug: print(label)
        path = os.path.join(folder, label)
        
        if os.path.isdir(path):
            
            for filename in os.listdir(path):
                
                ok = 0
                expected += 1

                try:
                    
                    image_path = os.path.join(path, filename)
                    
                    #image_pil = Image.open(image_path).convert('L')
                    #image = np.array(image_pil, 'uint8')
                    #cv2.equalizeHist(image, image)
                    
                    # get grayscale image
                    
                    raw_image = cv2.imread(image_path)
                    gray_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2GRAY)
                    image = gray_image
                    if equalize_hist:
                        image = cv2.equalizeHist(gray_image)
                    if median_blur:
                        image = cv2.medianBlur(image, median_blur)
                    
                    if detect_faces:
                        # get the face using Viola-Jones detector
                        faces = cascader.detectMultiScale(image, **cascader_kwargs)
                        for (x, y, w, h) in faces:
                            images.append(image[y:y+h, x:x+w])
                            labels.append(label)
                            if debug_faces:
                                cv2.imshow('Adding faces to training set...', image[y:y+h, x:x+w])
                                cv2.waitKey(50)
                        ok += len(faces)
                        if len(faces) > 0:
                            added += 1
                    else:
                        images.append(image)
                        labels.append(label)
                        ok = 1
                        added += 1

                except Exception as e:
                    print(e)
                    pass
                
                if debug: print('\t' + filename + (' [%d face]' % ok))
        
        if debug: print('[Added %d images]' % added)
    
    if debug_faces:
        cv2.destroyAllWindows()
    
    if debug_accuracy:
        print('Accuracy: %.5f%%' % (100 * added / max(1, expected)))
    
    return images, labels
    

In [38]:
%%time
# try getting labels from sample images folder
print('Collected %d images\n' % len(training_set[0]))

Accuracy: 100.00000%
Collected 75 images

CPU times: user 132 ms, sys: 0 ns, total: 132 ms
Wall time: 132 ms


In [51]:
# setup label dictionary and testing set
testing_set = get_images_and_labels(SAMPLE_TESTING, detect_faces=False, debug=False, debug_faces=True, debug_accuracy=True)
label_dict = {i: label for i, label in enumerate(training_set[1])}

Accuracy: 100.00000%


In [40]:
def perform_training(recognizer, **kwargs):
    training_set = get_images_and_labels(SAMPLE_TRAINING,
                                         detect_faces=True,
                                         debug=False,
                                         debug_faces=False,
                                         debug_accuracy=True)
    images, labels = training_set
    recognizer.train(images, np.array(range(len(images))))
    print('Trained %d images\n' % len(images))

In [41]:
def perform_testing(recognizer):
    %%time
    # Perform testing

    images, labels = testing_set
    zipped = zip(images, labels)
    print('Performing %d tests\n' % len(images))

    correct = 0
    total = 0

    for image, expected_label in zipped:
        label, confidence = recognizer.predict(image)
        if label in label_dict:
            actual_label = label_dict[label]
            if actual_label == expected_label:
                correct += 1
                print('%s is correctly recognized with confidence %.10f' % (actual_label, confidence))
            else:
                print('%s incorrect (recognized as %s with confidence %.10f)' % (actual_label, expected_label, confidence))
        else:
            print('No face found for %s' % label)
        total += 1

    print()
    print('Accuracy: %.2f%%' % (100 * (correct / max(1, total))))
    print('Correct:  %d' % correct)
    print('Wrong:    %d' % (total - correct))

# 1. EigenFaces

In [42]:
# face recognizer
recognizer = cv2.face.createEigenFaceRecognizer()

In [43]:
%%time
# Perform the training
perform_training(recognizer)

Trained 75 images

CPU times: user 456 ms, sys: 4 ms, total: 460 ms
Wall time: 460 ms


In [44]:
perform_testing(recognizer)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 6.91 µs
Performing 10 tests

Fernando_Gonzalez incorrect (recognized as Hosni_Mubarak with confidence 10687.3564137359)
Fernando_Gonzalez incorrect (recognized as Jeong_Se-hyun with confidence 11347.2819667894)
Jeong_Se-hyun incorrect (recognized as Aaron_Peirsol with confidence 9994.4947287886)
Heizo_Takenaka is correctly recognized with confidence 9199.6439138914
Colin_Farrell incorrect (recognized as Jesse_Jackson with confidence 11422.3797453893)
George_Clooney incorrect (recognized as Hugh_Grant with confidence 9472.7206803063)
Fernando_Gonzalez is correctly recognized with confidence 8045.6673116274
George_Clooney is correctly recognized with confidence 8544.8542757203
George_Clooney incorrect (recognized as Colin_Farrell with confidence 9793.1912058050)
George_Clooney incorrect (recognized as Charles_Taylor with confidence 10596.8797114477)

Accuracy: 30.00%
Correct:  3
Wrong:    7


# 2. FisherFaces

In [45]:
# face recognizer
recognizer = cv2.face.createFisherFaceRecognizer()

In [46]:
%%time
perform_training(recognizer)

Trained 75 images

CPU times: user 656 ms, sys: 8 ms, total: 664 ms
Wall time: 666 ms


In [47]:
%%time
perform_testing(recognizer)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 3.81 µs
Performing 10 tests

Fernando_Gonzalez incorrect (recognized as Hosni_Mubarak with confidence 10687.1966842647)
Fernando_Gonzalez incorrect (recognized as Jeong_Se-hyun with confidence 11346.3021815501)
Jeong_Se-hyun incorrect (recognized as Aaron_Peirsol with confidence 9979.5216437280)
Heizo_Takenaka is correctly recognized with confidence 9198.4237293494
Colin_Farrell incorrect (recognized as Jesse_Jackson with confidence 11404.3648418238)
George_Clooney incorrect (recognized as Hugh_Grant with confidence 9457.8092446267)
Fernando_Gonzalez is correctly recognized with confidence 8018.3304695702
George_Clooney is correctly recognized with confidence 8515.4131307016
George_Clooney incorrect (recognized as Colin_Farrell with confidence 9785.5626252579)
George_Clooney incorrect (recognized as Charles_Taylor with confidence 10549.5508920013)

Accuracy: 30.00%
Correct:  3
Wrong:    7
CPU times: user 44 ms, sys: 0 ns, total: 4

# 2. Local Binary Patterns

In [48]:
# face recognizer
recognizer = cv2.face.createLBPHFaceRecognizer()

In [49]:
%%time
perform_training(recognizer)

Trained 75 images

CPU times: user 176 ms, sys: 0 ns, total: 176 ms
Wall time: 175 ms


In [50]:
%%time
perform_testing(recognizer)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 17.6 µs
Performing 10 tests

Hosni_Mubarak is correctly recognized with confidence 70.6332804869
Jesse_Jackson incorrect (recognized as Jeong_Se-hyun with confidence 67.7481908087)
Colin_Farrell incorrect (recognized as Aaron_Peirsol with confidence 65.0345385776)
Heizo_Takenaka is correctly recognized with confidence 62.5162239504
Hugh_Grant incorrect (recognized as Jesse_Jackson with confidence 56.2324576027)
Hugh_Grant is correctly recognized with confidence 58.2146113385
Fernando_Gonzalez is correctly recognized with confidence 57.5177252210
Jeong_Se-hyun incorrect (recognized as George_Clooney with confidence 60.7873265369)
Colin_Farrell is correctly recognized with confidence 61.3683821506
Charles_Taylor is correctly recognized with confidence 67.7961789087

Accuracy: 60.00%
Correct:  6
Wrong:    4
CPU times: user 88 ms, sys: 0 ns, total: 88 ms
Wall time: 87.2 ms
