# Confusion matrix
Import libraries and defines the backends.

In [1]:
from deepface import DeepFace
from sklearn import metrics
import pandas as pd
import numpy as np
import os

detectors_backends_dict = {
    'opencv' : 7,
    'ssd' : 0.7,
    'dlib' : 0.7,
    'mtcnn' : 0.7,
    'retinaface' :0.7,
    'mediapipe' : 0.7
}

classification_labels = ['FACE', 'NOT_FACE', 'MULTIPLE_FACES']

FACE = 0
NOT_FACE = 1
MULTIPLE_FACES = 2

Specifies the path of the two datasets.


In [2]:
dataset_dir = "dataset/full/"

analysis = dict()
actual = [] # actual classified images in the dataset

print("Dataset dimension: ", len(os.listdir(dataset_dir)))

Dataset dimension:  300


Extract the actual classification from the dataset.

In [4]:
for path in os.listdir(dataset_dir):
    actual.append(path.split('-')[1].split(".")[0])

print("Single faces: ", actual.count(classification_labels[FACE]))
print("Not face images: ", actual.count(classification_labels[NOT_FACE]))
print("Multiple faces: ", actual.count(classification_labels[MULTIPLE_FACES]))

print(actual)
print(os.listdir(dataset_dir))

Single faces:  200
Not face images:  200
Multiple faces:  200
['FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES', 'NOT_FACE', 'FACE', 'MULTIPLE_FACES',

Defines a function to analyze the performances of a backend. It produces the predicted classification.

In [4]:
def analyze_face_detector_predictions(backend, path, conf_limit):
    predictions = []

    for img_path in os.listdir(path):
            try:
                img_path_full = path + "/" + img_path
                face_obj = DeepFace.extract_faces(img_path_full, detector_backend=backend)

                # Extract the list of confidence score for each face detected
                # filtering by the input confidence limit
                conf_list = list(filter(
                    lambda conf: conf >= conf_limit,
                    [obj['confidence'] for obj in face_obj if 'confidence' in obj]))
                    
                # If the list is not empty, count the false positives (if len = 1 there will be no
                # false positives) and increase the success_counter
                if len(conf_list) == 0:
                    predictions.append(classification_labels[NOT_FACE])
                elif len(conf_list) == 1:
                    predictions.append(classification_labels[FACE]) 
                elif len(conf_list) > 1:
                    predictions.append(classification_labels[MULTIPLE_FACES])
            except:
                    predictions.append(classification_labels[NOT_FACE])
                    
    return predictions                                             

Generate a report for the classification for each backend

- Precision: percentage of true positives over all the positives predicted  (predicted value that matches the actual ones)
- Recall: percentage of true positives over all the actual positives cases (True positives and false negatives) 

The difference between the two metrics is the following: the first calculate the true positives over the predicted ones while the second calculate the true positives over the actual ones.

In [5]:
for backend in detectors_backends_dict:
    print("Running {} backend".format(backend))
    
    predicted = analyze_face_detector_predictions(backend, dataset_dir, detectors_backends_dict[backend])
    analysis[backend] =  metrics.classification_report(actual, predicted, labels=classification_labels, output_dict=True)

Running opencv backend
Running ssd backend
Running dlib backend
Running mtcnn backend
Running retinaface backend
Running mediapipe backend


Print result as pandas DataFrame

In [6]:
for backend in detectors_backends_dict:
    print(backend)
    display(pd.DataFrame(analysis[backend]))

opencv


Unnamed: 0,FACE,NOT_FACE,MULTIPLE_FACES,accuracy,macro avg,weighted avg
precision,0.561905,0.578947,1.0,0.606667,0.713617,0.713617
recall,0.59,0.99,0.24,0.606667,0.606667,0.606667
f1-score,0.57561,0.730627,0.387097,0.606667,0.564445,0.564445
support,100.0,100.0,100.0,0.606667,300.0,300.0


ssd


Unnamed: 0,FACE,NOT_FACE,MULTIPLE_FACES,accuracy,macro avg,weighted avg
precision,0.877551,0.94898,0.894231,0.906667,0.90692,0.90692
recall,0.86,0.93,0.93,0.906667,0.906667,0.906667
f1-score,0.868687,0.939394,0.911765,0.906667,0.906615,0.906615
support,100.0,100.0,100.0,0.906667,300.0,300.0


dlib


Unnamed: 0,FACE,NOT_FACE,MULTIPLE_FACES,accuracy,macro avg,weighted avg
precision,0.781513,0.933962,0.973333,0.883333,0.896269,0.896269
recall,0.93,0.99,0.73,0.883333,0.883333,0.883333
f1-score,0.849315,0.961165,0.834286,0.883333,0.881589,0.881589
support,100.0,100.0,100.0,0.883333,300.0,300.0


mtcnn


Unnamed: 0,FACE,NOT_FACE,MULTIPLE_FACES,accuracy,macro avg,weighted avg
precision,0.865385,1.0,0.869565,0.903333,0.91165,0.91165
recall,0.9,0.81,1.0,0.903333,0.903333,0.903333
f1-score,0.882353,0.895028,0.930233,0.903333,0.902538,0.902538
support,100.0,100.0,100.0,0.903333,300.0,300.0


retinaface


Unnamed: 0,FACE,NOT_FACE,MULTIPLE_FACES,accuracy,macro avg,weighted avg
precision,0.876289,0.956044,0.883929,0.903333,0.90542,0.90542
recall,0.85,0.87,0.99,0.903333,0.903333,0.903333
f1-score,0.862944,0.910995,0.933962,0.903333,0.902634,0.902634
support,100.0,100.0,100.0,0.903333,300.0,300.0


mediapipe


Unnamed: 0,FACE,NOT_FACE,MULTIPLE_FACES,accuracy,macro avg,weighted avg
precision,0.686957,0.642384,1.0,0.7,0.776447,0.776447
recall,0.79,0.97,0.34,0.7,0.7,0.7
f1-score,0.734884,0.772908,0.507463,0.7,0.671752,0.671752
support,100.0,100.0,100.0,0.7,300.0,300.0


In [7]:
import time
tic = time.time()
analyze_face_detector_predictions('ssd', dataset_dir, detectors_backends_dict['ssd'])
tac = time.time()

In [8]:
print(tac - tic)

418.5572648048401
