# Face Recognition for Identity #

## Load Packages ##

In [1]:
import os
import pickle

import face_recognition

import numpy as np
np.random.seed(0)

## Load Training Data ##

In [2]:
data_dir = "/home/lee/Documents/DatasetsForGitHub/face_recognition_for_identity/"

Our training dataset has high-resolution face images of 10 subjects, taken in a well-lit environment. 

In [3]:
# grab the paths to the input images in our dataset
train_image_paths = []
for root, dirs, files in os.walk(data_dir + "MIT-CBCL-facerec-database/training-originals/"):
    for file in files:
        if file.endswith(".jpg"):
             train_image_paths.append(os.path.join(root, file))

Encode the training images.

In [4]:
result_names = []
for image_path in train_image_paths:
    # extract the subject's  name from the image path
    name = image_path.split(os.path.sep)[-2]
    result_names.append(name)
    del name

In [5]:
def loop_over_images(image_paths):
    # initialize the list of encodings and names
    result_encodings = []
    
    for image_path in image_paths:
        # extract the subject's  name from the image path
        # name = imagePath.split(os.path.sep)[-2]

        # Load the jpg files into numpy arrays
        image = face_recognition.load_image_file(image_path)

        # compute the facial embedding for the face
        encodings = face_recognition.face_encodings(image)
        # note every "encodings" is an one-element list, this element is a (1, 128) array
        # if no face is found in the image, "encodings" is an empty list. 

            # loop over the encodings
        
            # add each encoding + name to our set of known names and encodings
        result_encodings.append(encodings)
        
        del encodings
            # result_names.append(name)
        
    return result_encodings

In [6]:
known_names = result_names
del result_names

known_encodings = loop_over_images(train_image_paths)

# dump the facial encodings + names to disk
data = {"encodings": known_encodings, "names": known_names}
pickle.dump(data, open(data_dir + "known_faces_encoded.pkl", "wb"))

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))


In [5]:
del train_image_paths, known_encodings, known_names, data

## Start Testing ##

The test set consists of 200 images per subject. The data provider varied the illumination, pose (up to about 30 degrees of rotation in depth) and the background. Quality of the test photos are a realistic simulation to the working environment of a real-world face-recognition-based user identification system.

In [6]:
# load the known faces and embeddings
data = pickle.loads(open(data_dir + "known_faces_encoded.pkl", "rb").read())

In [7]:
# load all test photos
test_image_paths = []
for root, dirs, files in os.walk(data_dir + "MIT-CBCL-facerec-database/test/"):
    for file in files:
        if file.endswith(".pgm"):
            test_image_paths.append(os.path.join(root, file))

test_truth_names = []
for image_path in test_image_paths:
    # extract the person name from the image path
    name = image_path.split(os.path.sep)[-1]
    test_truth_names.append(name[:4])
    del name

test_encodings = loop_over_images(test_image_paths)

### Individual Identification ###

Pick a random test photo. 

In [16]:
unknown_image = test_image_paths[np.random.randint(0, len(test_image_paths))]
unknown_image_truth = unknown_image.split(os.path.sep)[-1][:4]

Output the name of the subject in the test photo. 

In [17]:
unknown_image_array = face_recognition.load_image_file(unknown_image)
unknown_encoding = face_recognition.face_encodings(unknown_image_array)[0]

# results is an array of True/False telling if the unknown face matched anyone in the known_faces array
results_indv = np.array([face_recognition.face_distance(encoding, unknown_encoding) for \
                       encoding in data['encodings']]).argmin()

test_names_indv = data['names'][results_indv]

print('True name of the test subject: {}'.format(unknown_image_truth))
print('Test subject identified as: {}'.format(test_names_indv))

True name of the test subject: 0008
Test subject identified as: 0008


  # This is added back by InteractiveShellApp.init_path()


### Overall Accuracy ###

Now we run the testing on all test photos to obtain an overall accuracy. 

In [59]:
tolerance = 0.6 # This parameter is subject to tuning in each application. 

test_predicted_names = []
for test_encoding in test_encodings:
    if len(test_encoding) == 1:
    # verification and identification
        distances = np.array([face_recognition.face_distance(encoding, test_encoding[0]) for \
                           encoding in data['encodings']])

        results_indv = distances.argmin()
        # np.array(face_recognition.compare_faces(knownEncodings, test_encoding, tolerance=0.5))

        if distances[results_indv] <= tolerance:
            test_names_indv = data['names'][results_indv]
        else:
            test_names_indv = ' '
    
    else:
        test_names_indv = ' '
    test_predicted_names.append(test_names_indv)

  if __name__ == '__main__':


In [60]:
# show accuracy
test_truth_names = np.array(test_truth_names)
test_predicted_names = np.array(test_predicted_names)

if test_truth_names.shape == test_predicted_names.shape:
    if np.array_equal(test_truth_names, test_predicted_names):
        accuracy = 1.0
    else: 
        accuracy = np.count_nonzero(test_truth_names == test_predicted_names)/len(test_truth_names)
print('Overall accuracy: {0:0.4f}'.format(accuracy))

Overall accuracy: 0.9970
