In [15]:
import sys
import os
import dlib
import glob
from skimage import io
from sklearn.metrics.pairwise import euclidean_distances
import pandas as pd
from tqdm import tqdm #Optional, for tracking progress of for loop
import time

# This code throws a few 'useless' warnings. This is to suppress those, or see them only once.
import warnings
#warnings.filterwarnings("ignore")
warnings.filterwarnings("once")

In [28]:
# Set-up of parameters
## Facial detection
detector = dlib.get_frontal_face_detector()

## Landmark extraction
predictor_path = "shape_predictor_68_face_landmarks.dat"
sp = dlib.shape_predictor(predictor_path)

## Facial recognition
face_rec_model_path = "dlib_face_recognition_resnet_model_v1.dat"
facerec = dlib.face_recognition_model_v1(face_rec_model_path)

## Filename directory
#file_pairings = pd.read_excel('Filenames.xlsx')
file_pairings = pd.read_excel('../../../demo_faces/demo_filename_pairings.xlsx')
#images_directory_path = '/images/'
images_directory_path = '../../../demo_faces/'

# Precision parameter (lower number = faster runtime, less accuracy; speed of 1 -> 99.13% accuracy, 100 -> 99.38%)
speed = 1

In [29]:
file_pairings

Unnamed: 0,Image 1,Image 2
0,kyi1.jpg,kyi2.jpg
1,kyi1.jpg,kyi3.jpg
2,kyi1.jpg,thida1.jpg
3,thida2.jpg,thida3.jpg
4,mandela+others.jpg,mandela2.jpg
5,mandela1.jpg,mandela2.jpg
6,mandela1.jpg,mandela3.jpg
7,mobama1.jpg,mobama2.jpg
8,bobama2.jpg,bobama5.jpg


In [35]:
# Process paired images, organized in two column sheet

## Track and store information on each image as it is processed, as well as score; set Match Score default as missing
file_pairings['Img1 Processed?'] = ''
file_pairings['Img2 Processed?'] = ''
file_pairings['Img1 Vector'] = ''
file_pairings['Img2 Vector'] = ''
file_pairings['Match Score'] = float(-222)

for row_index in tqdm(range(file_pairings.shape[0])):
    for col_index in range(2):
        
        # Check for missing image; skip if missing
        image_name = file_pairings.iloc[row_index, col_index]
        if image_name != image_name:
            if col_index == 0:
                file_pairings['Img1 Processed?'][row_index] = 0
            elif col_index == 1:
                file_pairings['Img2 Processed?'][row_index] = 0
            continue
        
        # Read image and detect faces
        image_path = images_directory_path + image_name
        try:
            read_in_image = io.imread(image_path)
            win = dlib.image_window()
            win.clear_overlay()
            win.set_image(read_in_image)
        except IOError:
            if col_index == 0:
                #file_pairings['Img1 Processed?'][row_index] = 0
                file_pairings['Img1 Processed?'][row_index] = 'File not found.'
            elif col_index == 1:
                #file_pairings['Img2 Processed?'][row_index] = 0
                file_pairings['Img2 Processed?'][row_index] = 'File not found.'
            continue
        faces = detector(read_in_image, 1)
        
        # If there is not exactly 1 face, skip
        if len(faces) != 1:
            if col_index == 0:
                #file_pairings['Img1 Processed?'][row_index] = 0
                file_pairings['Img1 Processed?'][row_index] = str(len(faces)) + ' faces found.'
            elif col_index == 1:
                #file_pairings['Img2 Processed?'][row_index] = 0
                file_pairings['Img2 Processed?'][row_index] = str(len(faces)) + ' faces found.'
            continue
  
        # Process the face and make vector
        shape = sp(read_in_image, faces[0])
        
        win.clear_overlay()
        win.add_overlay(faces[0])
        win.add_overlay(shape)
        time.sleep(2)
        
        face_descriptor = facerec.compute_face_descriptor(read_in_image, shape, speed)        
        
        # Log successful processing and 128D vector
        if col_index == 0:
            file_pairings['Img1 Processed?'][row_index] = 1
            file_pairings['Img1 Vector'][row_index] = face_descriptor
        elif col_index == 1:
            file_pairings['Img2 Processed?'][row_index] = 1
            file_pairings['Img2 Vector'][row_index] = face_descriptor

        # If pair of images successfully processed, compute Euclidean distance
        if file_pairings['Img1 Processed?'][row_index] == 1 and file_pairings['Img2 Processed?'][row_index] == 1:
            file_pairings['Match Score'][row_index] = euclidean_distances(file_pairings['Img1 Vector'][row_index], file_pairings['Img2 Vector'][row_index])[0][0]            

            if file_pairings['Match Score'][row_index] != -222 and file_pairings['Match Score'][row_index] < 0.5:
                win.set_title('SAME PERSON')
                time.sleep(5)
            elif file_pairings['Match Score'][row_index] != -222 and file_pairings['Match Score'][row_index] < 0.6:
                
            else:
                win.set_title('NOT SAME PERSON')
                time.sleep(5)
    # In general, if two face descriptor vectors have a Euclidean
    # distance between them less than 0.6 then they are from the same
    # person, otherwise they are from different people.  He we just print
    # the vector to the screen.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
100%|██████████| 9/9 [00:55<00:00,  6.95s/it]


In [12]:
def categorize_scores(file_pairings, threshold=0.6, percentage=0.1):
    
    # Flagging calculated scores that exceed Euclidean distance threshold
    if threshold != 0: # pass threshold = 0 to skip test
        file_pairings['Same Person: Threshold Test'] = ''
        
        for row_index in range(len(file_pairings)):
            # Check for valid Match Score and Score less than set threshold
            if file_pairings['Match Score'][row_index] != -222 and file_pairings['Match Score'][row_index] < threshold:
                file_pairings['Same Person: Threshold Test'][row_index] = 1.0
            elif file_pairings['Match Score'][row_index] != -222 and file_pairings['Match Score'][row_index] > threshold:
                file_pairings['Same Person: Threshold Test'][row_index] = 0.0
            elif file_pairings['Match Score'][row_index] == -222:
                file_pairings['Same Person: Threshold Test'][row_index] = -222
    
    # Flagging a set percentage of calculated scores, by greatest distance
    if percentage != 0: # pass percentage = 0 to skip test
        file_pairings['Same Person: Percentage Test'] = -222
        
        # Consider only valid values
        num_valid_values = len(file_pairings[file_pairings['Match Score']!=-222])
        minimum_valid_score = min(file_pairings[file_pairings['Match Score']!=-222]['Match Score'])
        
        # Sort by match score, figure out valid index range
        file_pairings = file_pairings.sort_values('Match Score')
        file_pairings.index = range(len(file_pairings))
        lowest_valid_value_index = list(file_pairings['Match Score']).index(minimum_valid_score)
        
        # Identify index cut-off based on specified percentage
        percent_index_highest_valid = int(num_valid_values * (1-percentage)) + lowest_valid_value_index #int rounds down, but the 0 index also means that 1 extra is being counted anyway
        
        # Assign
        file_pairings['Same Person: Percentage Test'][:lowest_valid_value_index] = -222
        file_pairings['Same Person: Percentage Test'][lowest_valid_value_index:percent_index_highest_valid] = 1
        file_pairings['Same Person: Percentage Test'][percent_index_highest_valid:] = 0
    
    return file_pairings

In [27]:
categorize_scores(file_pairings)#.to_csv('final_result.csv')

# For all IPA deployment, probably don't output score, probably ask for threshold or percentage, not both, don't include vectors

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,kyi1.jpg,kyi2.jpg,Img1 Processed?,Img2 Processed?,Img1 Vector,Img2 Vector,Match Score,Same Person: Threshold Test,Same Person: Percentage Test
0,mandela+others.jpg,mandela2.jpg,2 faces found.,1,,"[-0.14226526022, 0.0516261681914, 0.1722920984...",-222.0,-222,-222
1,mobama1.jpg,mobama2.jpg,2 faces found.,1,,"[-0.119711630046, 0.0367495790124, 0.061876196...",-222.0,-222,-222
2,kyi1.jpg,kyi3.jpg,1,1,"[-0.131510674953, 0.0725523233414, 0.122026383...","[-0.120575323701, 0.0808594599366, 0.065408669...",0.351324,1,1
3,thida2.jpg,thida3.jpg,1,1,"[-0.0811650902033, 0.0799063518643, 0.07091969...","[-0.042507532984, 0.0727185606956, 0.050695080...",0.369943,1,1
4,mandela1.jpg,mandela3.jpg,1,1,"[-0.202306374907, 0.11107929796, 0.18166196346...","[-0.156554937363, 0.101945199072, 0.1673289537...",0.413029,1,1
5,bobama2.jpg,bobama5.jpg,1,1,"[-0.101030021906, 0.158050179482, 0.0574290379...","[-0.12924516201, 0.167289584875, 0.04478578269...",0.430007,1,1
6,mandela1.jpg,mandela2.jpg,1,1,"[-0.202306374907, 0.11107929796, 0.18166196346...","[-0.14226526022, 0.0516261681914, 0.1722920984...",0.436243,1,1
7,kyi1.jpg,thida1.jpg,1,1,"[-0.131510674953, 0.0725523233414, 0.122026383...","[-0.0689536333084, 0.0722238197923, 0.04564921...",0.690596,0,0
