In [3]:
# https://github.com/ageitgey/face_recognition
import face_recognition
from sklearn import svm
import os

In [24]:
valid_extensions = ('.jpg', '.jpeg', '.png', '.heic')
named_dir = '/Users/CollinHeist/Documents/GitHub/Personal-Projects/Duplicate Picture Identifier/Named/'
search_dir = '/Users/CollinHeist/Documents/GitHub/Personal-Projects/Duplicate Picture Identifier/Search/'

In [9]:
def get_image_list(path, ignore_list=[None]):
    list_of_files = os.listdir(path) # Get a list of all files in the current directory
    all_files = []
    # Iterate over all the entries
    for entry in list_of_files:
        # Create full path
        full_path = os.path.join(path, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(full_path) and entry not in ignore_list:
            all_files = all_files + get_image_list(full_path, ignore_list)
        else:
            all_files.append(full_path)
                            
    # Return a filtered list that only contains files who end in the valid extension
    return [image for image in all_files if image.lower().endswith(valid_extensions)]

In [21]:
def get_named_encodings(named_dir, ignore_list=[None]):
    # Start with empty lists for each encoding and name
    encodings, names = [], []
    
    train_dir_list = get_image_list(named_dir, ignore_list)
    for file in train_dir_list:
        # Load feature recognition on the current image
        face = face_recognition.load_image_file(file)
        # Get bounding boxes for each face in the current image
        face_boxes = face_recognition.face_locations(face)
        # If there is only one face in the current image
        if (len(face_boxes) == 1):
            face_enc = face_recognition.face_encodings(face)[0]
            
            names.append(file[len(named_dir):].split('/')[0])
            encodings.append(face_enc)
            
    return names, encodings

## Fit the model to the known data

In [27]:
names, encodings = get_named_encodings(named_dir)
clf = svm.SVC(gamma='scale')
clf.fit(encodings, names)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

## Test each of the unknown images

In [28]:
for image in get_image_list(search_dir, ['Named']):
    # Load the test image with unknown faces into an array
    image_enc = face_recognition.load_image_file(image)

    # Find all the faces in the test image using default HOG-based model
    face_locations = face_recognition.face_locations(image_enc)
    num_faces = len(face_locations)
    print ("Found {} faces".format(num_faces))

    print ("Found: ")
    for i in range(num_faces):
        test_image_enc = face_recognition.face_encodings(image_enc)[i]
        name = clf.predict([test_image_enc])
        print(*name)

Found 1 faces
Found: 
AN


In [19]:
# Load the test image with unknown faces into an array
test_image = face_recognition.load_image_file("/Users/CollinHeist/Documents/GitHub/Personal-Projects/Duplicate Picture Identifier/Named/IMG_0059.jpeg")

# Find all the faces in the test image using default HOG-based model
face_locations = face_recognition.face_locations(test_image)
num_faces = len(face_locations)
print ("Found {} faces".format(num_faces))

print ("Found: ")
for i in range(num_faces):
    test_image_enc = face_recognition.face_encodings(test_image)[i]
    name = clf.predict([test_image_enc])
    print(*name)

Found 1 faces
Found: 
Bella Simpson
