In [1]:
import face_recognition
import numpy as np
from PIL import Image, ImageDraw
from IPython.display import display
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import time
import cv2
import matplotlib.pyplot as plt
import math
from scipy import stats
import operator

## Setup

Go to the README file of https://github.com/ageitgey/face_recognition to set up the ```face_recognition``` library locally, along with associated dependencies.

## Get All Filepaths & Place in Dataframe

In [2]:
# get dataframe with viable filepaths and the associated name of subject

# get filepaths for for training and test datasets – test dataset contains one image per subject, training set
# contains remaining images

rootdir = '/Users/lanzhang/Face-Recognition-Demo/makerspace'

paths = []
names = []

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        if subdir != rootdir and file != '.DS_Store':
            filepath = str(os.path.join(subdir, file))
            paths.append(filepath)
            name = subdir.split("makerspace/")[1]
            names.append(name)

In [3]:
# create pandas dataframe containing the filepath of each image and their associated subject name
dataframe_dict = {}
dataframe_dict['names'] = names
dataframe_dict['filepath'] = paths

In [4]:
all_data_df = pd.DataFrame.from_dict(dataframe_dict)

In [5]:
all_data_df

Unnamed: 0,names,filepath
0,tyler,/Users/lanzhang/Face-Recognition-Demo/makerspa...
1,tyler,/Users/lanzhang/Face-Recognition-Demo/makerspa...
2,tyler,/Users/lanzhang/Face-Recognition-Demo/makerspa...
3,tyler,/Users/lanzhang/Face-Recognition-Demo/makerspa...
4,tyler,/Users/lanzhang/Face-Recognition-Demo/makerspa...
...,...,...
4549,alison,/Users/lanzhang/Face-Recognition-Demo/makerspa...
4550,alison,/Users/lanzhang/Face-Recognition-Demo/makerspa...
4551,alison,/Users/lanzhang/Face-Recognition-Demo/makerspa...
4552,alison,/Users/lanzhang/Face-Recognition-Demo/makerspa...


## Training

In [6]:
# list containing encodings for training faces
known_face_encodings = []

# list containing labels for training faces
known_face_names = []

not_recognized = []

# iterate through each training image, learn encoding, append true label to known_face_names
train_paths = all_data_df['filepath'].tolist()
train_names = all_data_df['names'].tolist()

start = time.time()
for image_num in range(len(train_paths)):
    image = face_recognition.load_image_file(train_paths[image_num])
    boxes = face_recognition.face_locations(image, model="cnn")
    try:
        face_encoding = face_recognition.face_encodings(image, boxes)[0]
        known_face_encodings.append(face_encoding)
        known_face_names.append(train_names[image_num])
    except:
        not_recognized.append(image_num)
end = time.time()

print('Learned encoding for', len(known_face_encodings), 'images.')
print('\nExecution time: ', (end - start))

Learned encoding for 4176 images.

Execution time:  1213.9975180625916


## Test Data

In [7]:
# get dataframe with viable filepaths and the associated name of subject

# get filepaths for for training and test datasets – test dataset contains one image per subject, training set
# contains remaining images

rootdir = '/Users/lanzhang/Face-Recognition-Demo/kinectaa_v2'

kinectaa_paths = []

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        if file != '.DS_Store':
            filepath = str(os.path.join(subdir, file))
            kinectaa_paths.append(filepath)

In [8]:
rootdir = '/Users/lanzhang/Face-Recognition-Demo/kinectbb_v2'

kinectbb_paths = []

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        if file != '.DS_Store':
            filepath = str(os.path.join(subdir, file))
            kinectbb_paths.append(filepath)

In [9]:
rootdir = '/Users/lanzhang/Face-Recognition-Demo/kinectcc_v2'

kinectcc_paths = []

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        if file != '.DS_Store':
            filepath = str(os.path.join(subdir, file))
            kinectcc_paths.append(filepath)

In [10]:
# video_filepaths = kinectbb_paths + kinectcc_paths
video_filepaths = kinectaa_paths + kinectbb_paths + kinectcc_paths

In [11]:
video_filepaths.sort()

In [12]:
true_names = ['bertrand']*len(video_filepaths)

## Test Accuracy on Video Data

In [13]:
def face_distance_to_conf(face_distance, face_match_threshold=0.6):
    """
    Converts Euclidean distance between two faces (one known, one unknown) into a percentage match score
    (from: https://github.com/ageitgey/face_recognition/wiki/Calculating-Accuracy-as-a-Percentage)
    """
    if face_distance > face_match_threshold:
        range = (1.0 - face_match_threshold)
        linear_val = (1.0 - face_distance) / (range * 2.0)
        return linear_val
    else:
        range = face_match_threshold
        linear_val = 1.0 - (face_distance / (range * 2.0))
        return linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))

In [14]:
# list containing predicted names for test faces
predicted = []

# list containing true labels for test faces
true_labels = []

# list containing all images that a face was detected in, for prediction
used_filepaths = []

# list containing prediction confidence scores for each prediction, calculated from Euclidean distance
distances = []

# iterate through each image in test set and generate predictions
start = time.time()
for test_image_num in range(len(video_filepaths)):
    unknown_image = face_recognition.load_image_file(video_filepaths[test_image_num])
    face_locations = face_recognition.face_locations(unknown_image, model="cnn")
    face_encodings = face_recognition.face_encodings(unknown_image, face_locations)
    
    try:
        face_encoding = face_encodings[0]

        # See if the face is a match for the known face(s)
        matches = face_recognition.compare_faces(known_face_encodings, face_encoding)

        name = "Unknown"

        # Or instead, use the known face with the smallest distance to the new face
        face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
        best_match_index = np.argmin(face_distances)
        
        # 
        if matches[best_match_index]:
            name = known_face_names[best_match_index]
            used_filepaths.append(video_filepaths[test_image_num])
            distances.append(face_distance_to_conf(np.min(face_distances)))
        
        # update predictions and true labels listts
        predicted.append(name)
        true_labels.append(true_names[test_image_num])
    
    except:
        pass
end = time.time()
print('Elapsed time: ', (end-start))

Elapsed time:  129.76945209503174


## Overall Test Accuracy

In [15]:
test_accuracy = 1 - (sum(np.array(predicted) != np.array(true_labels))/(len(np.array(predicted))))
print("Test accuracy: ", test_accuracy)

Test accuracy:  0.3946360153256705


## Measure with Euclidean Distance

In [16]:
# Instead of immediately taking the mode across all predicted labels, here each prediction is assigned 
# a “vote” score (weight) equal to their associated confidence percentage before tallying up the total votes 
# for each label across all images

# Final prediction is the label with the highest vote score

all_labels = {}
for label_index in range(len(predicted)):
    label = predicted[label_index]
    if label in all_labels.keys():
        all_labels[label] += distances[label_index]
    else:
        all_labels[label] = distances[label_index]

In [17]:
all_labels

{'bertrand': 95.8172609586571,
 'adeeb': 37.44709657713603,
 'vivek': 42.187764967072404,
 'marc': 15.907130526140094,
 'iulian': 15.906371831362364,
 'mohamed': 3.718317324016341,
 'plum': 2.8094665219441293,
 'tajesh': 2.7623799739921306,
 'emily': 2.784336167297263,
 'hannes': 0.9146692371135874,
 'prasanth': 2.741634372755758,
 'khisai': 2.8276429812632173,
 'peri': 0.943942762616834,
 'jazib': 4.645471061525951,
 'suki': 4.673962702151087,
 'juliet': 2.7816133048279754,
 'emily_tf': 3.6740690606346247,
 'mitch': 0.9331013719750714}

In [18]:
final_prediction = max(all_labels.items(), key=operator.itemgetter(1))[0]
print(final_prediction)

bertrand
