In [12]:
"""
This code is designed to compare as many facial detection algorithms as possible. The metrics for comparision are:
accuracy, total time to process, time to process per image. 

Links to algorithms, datasets, and other resources used:
OpenCV2: https://opencv.org/
Facenet-pytorch: https://github.com/timesler/facenet-pytorch
Deepface: https://github.com/serengil/deepface
InsightFace: https://github.com/deepinsight/insightface
dlib: http://dlib.net/
Retinaface (python ver.): https://github.com/heewinkim/retinaface and https://github.com/biubug6/Pytorch_Retinaface
DSFD: https://github.com/hukkelas/DSFD-Pytorch-Inference
Blazeface (mediapipe ver.): https://google.github.io/mediapipe/
yoloface: https://pypi.org/project/yoloface/
Caltech256 dataset: https://www.kaggle.com/jessicali9530/caltech256
FaceForensics++ dataset: https://github.com/ondyari/FaceForensics

@author Bryce Gernon
"""

"""
Stats recorded on an RTX2080Ti GPU, where applicable. 
Dataset used is 10000 original faces from the FaceForensics++ dataset and 10000 non-face objects from the Caltech 256 dataset.

OpenCV cpu (default haar cascade classifier): 68.16%, 204.81s, .010s/image
MTCNN (default pretrained): 94.465%, 287.34s, .014s/image
DeepFace OpenCV haar: 84.30%, 238.96s, .012s/image
DeepFace OpenCV ssd: 96.09%, 227.15s, .011s/image
dlib: 98.425%, 4143.69s, .207s/image
RetinaFace: 40.145%, 3081.26s, .154s/image
DeepFace retinaface: 93.85%, 1716.77s, .086s/image
DSFD (pytorch ver.): 94.295%, 693.93s, .035s/image 
BlazeFace (mediapipe): 96.22%, 144.60s, .007s/image
yoloface: 70.45%, 232.68s, .012s/image
"""

import cv2
from argparse import ArgumentParser
import dlib
import os
import pickle
from PIL import Image
from numpy import asarray
import time
import matplotlib.pyplot as plt
import torchvision as tv
import torch
import inspect
from facenet_pytorch import MTCNN, InceptionResnetV1
import random
import deepface
from deepface import DeepFace 
from retinaface import RetinaFace
from retinaface.pre_trained_models import get_model
import face_detection
from face_detection import build_detector
import tensorflow
import insightface_paddle
import logging
import easydict
import mediapipe as mp
from yoloface import face_analysis
import pkgutil
from mmdet import apis as mmdetstuff

backends = ['retinaface', 'opencv' , 'mtcnn']
face_detection_models = ['DSFDDetector']
PATH1 = "../../../data/datasets/FaceForensics++/Faces/DeepFakes/val/original" # This can be any directory containing (possibly nested in folders) jpg images of single faces.
PATH2 = "../../../data/datasets/Caltech256/256_ObjectCategories" # This can be any directory containing (possibly nested in folders) jpg images of non-face images.
NONFACE_NUM = 100
FACE_NUM = 100
RETINA_QUALITY = "normal"

def get_nonface():
    """ Uses the directory in PATH2 to obtain and label non-face images. 
    Data is returned in list format [[boolean isFace, Image.open(image), string path_to_image], ...]
    
    """
    database = []
    current_num = 0
    for directory in os.listdir(PATH2):
        for image in os.listdir(PATH2 + "/" + directory):
            if image.endswith("jpg"):
                with Image.open(PATH2 + "/" + directory + "/" + image) as im:
                    database.append([False, im, PATH2 + "/" + directory + "/" + image])
                    current_num += 1
                    if (current_num >= NONFACE_NUM):
                        print("Non-faces stored in database: " + str(current_num))
                        return database

def make_dataset(p1, p2):
    #  First, get caltech images and load into new list with new labels (False for no face) + existing image data
    database = get_nonface()
        

    # Next, get facial images from FaceForensics++, and load them into previous list with new labels (True for face) + existing image data + path to image

    current_num = 0
    for directory in os.listdir(PATH1):
        for image in os.listdir(PATH1 + "/" + directory):
            if image.endswith("jpg"):
                with Image.open(PATH1 + "/" + directory + "/" + image) as im:
                    database.append([True, im, PATH1 + "/" + directory + "/" + image])
                    current_num += 1
                    if (current_num >= FACE_NUM):
                        print("Faces stored in database: " + str(current_num))
                        return database
    return None

In [13]:
def compare(data):    
    print("Beginning algorithm comparisons...")    
    count = 0
    correct = 0
    start = time.time()
    print("Creating face analysis...")
    face = face_analysis()
    print("Analyzing batches...")
    random.shuffle(data)
    os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
    #################################################################################################
    for batch in data:
        img,box,conf=face.face_detection(image_path=batch[2],model='full')
        if (batch[0] == True and len(box) == 1) or (batch[0] == False and len(box) == 0):
            correct = correct + 1
        count = count + 1
        if (count % 1000) == 0:
            print("Processed " + str(count) + " images.")    
    print("Algorithm used: Yoloface") 
    print("Time taken: " + str(time.time() - start) + "s")
    print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
    print("Number correct: " + str(correct))
    print("Accuracy: " + str((correct/(len(data)))*100) + "%")
    print()
    #################################################################################################
    start = time.time()
    correct = 0
    haar_cv2 = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    count = 0
    for batch in data:
        image = cv2.cvtColor(cv2.imread(batch[2]), cv2.COLOR_BGR2RGB)
        faces = haar_cv2.detectMultiScale(image)
        if (batch[0] == True and len(faces) > 0) or (batch[0] == False and len(faces) == 0):
            correct = correct + 1
        count = count + 1
        if (count % 1000) == 0:
            print("Processed " + str(count) + " images.")    
    print("Algorithm used: Opencv default haar cascade classifier") 
    print("Time taken: " + str(time.time() - start) + "s")
    print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
    print("Number correct: " + str(correct))
    print("Accuracy: " + str((correct/(len(data)))*100) + "%")
    print()
    #################################################################################################
    mtcnn = MTCNN(device='cuda', keep_all=True, margin=20).eval()
    count = 0
    correct = 0
    start = time.time()
    for batch in data:
        faces = mtcnn.detect(cv2.imread(batch[2])[:, :, ::-1])
        if faces[0] is not None:
            if (batch[0] == True) and (len(faces) == 1):
                correct = correct + 1
        else:
            if (batch[0] == False):
                correct = correct + 1
        count = count + 1
        if (count % 1000) == 0:
            print("Processed " + str(count) + " images.")
    print("Algorithm used: MTCNN") 
    print("Time taken: " + str(time.time() - start) + "s")
    print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
    print("Number correct: " + str(correct))
    print("Accuracy: " + str((correct/(len(data)))*100) + "%")
    print()
    #################################################################################################
    DeepFace.env = 'cuda'
    DeepFace.device = 'cuda'
    for backend in backends:
        count = 0
        correct = 0
        start = time.time()
        for batch in data:
            faces = DeepFace.detectFace(img_path=batch[2], detector_backend=backend, enforce_detection=False)
            if (batch[0] == True and faces[0][0][0] != 0.0) or (batch[0] == False and faces[0][0][0] == 0.0):
                correct = correct + 1
            count = count + 1
            if (count % 1000) == 0:
                print("Processed " + str(count) + " images.")
        print("Algorithm used: DeepFace " + backend) 
        print("Time taken: " + str(time.time() - start) + "s")
        print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
        print("Number correct: " + str(correct))
        print("Accuracy: " + str((correct/(len(data)))*100) + "%")
        print()
    #################################################################################################
    count = 0
    correct = 0
    start = time.time()
    cnn_face_detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
    for batch in data:
        img = dlib.load_rgb_image(batch[2])
        faces = cnn_face_detector(img, 1)
        if (batch[0] == True and len(faces) == 1) or (batch[0] == False and len(faces) == 0):
            correct = correct + 1
        count = count + 1
        if (count % 1000) == 0:
            print("Processed " + str(count) + " images.")   
    print("Algorithm used: Dlib") 
    print("Time taken: " + str(time.time() - start) + "s")
    print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
    print("Number correct: " + str(correct))
    print("Accuracy: " + str((correct/(len(data)))*100) + "%")
    print()
    #################################################################################################
    count = 0
    correct = 0
    start = time.time()
    model = get_model("resnet50_2020-07-20", max_size=2048, device="cuda")
    model.eval()
    for batch in data:
        image = cv2.imread(batch[2])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        faces = model.predict_jsons(image)
        if (batch[0] == True and len(faces) == 1 and len(faces[0]['bbox']) != 0) or (batch[0] == False and len(faces[0]['bbox']) == 0):
            correct = correct + 1
        count = count + 1
        if (count % 1000) == 0:
            print("Processed " + str(count) + " images.")   
    print("Algorithm used: Retinaface, quality=" + RETINA_QUALITY) 
    print("Time taken: " + str(time.time() - start) + "s")
    print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
    print("Number correct: " + str(correct))
    print("Accuracy: " + str((correct/(len(data)))*100) + "%")
    print()
    #################################################################################################
    count = 0
    correct = 0
    start = time.time()
    for model_type in face_detection_models:
        detector = build_detector(model_type, confidence_threshold=.5, nms_iou_threshold=.3, device="cuda")
        for batch in data:
            image = cv2.imread(batch[2])[:, :, ::-1]
            faces = detector.detect(image)
            if (batch[0] == True and len(faces) == 1) or (batch[0] == False and len(faces) == 0):
                correct = correct + 1
            count = count + 1
            if (count % 1000) == 0:
                print("Processed " + str(count) + " images.")
    
        print("Algorithm used: " + model_type) 
        print("Time taken: " + str(time.time() - start) + "s")
        print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
        print("Number correct: " + str(correct))
        print("Accuracy: " + str((correct/(len(data)))*100) + "%")
        print()
        count = 0
        correct = 0
        start = time.time()
    #################################################################################################
    mp_face_detection = mp.solutions.face_detection
    mp_drawing = mp.solutions.drawing_utils
    with mp_face_detection.FaceDetection(min_detection_confidence=0.67) as face_detection:
        for batch in data:
            image = cv2.cvtColor(cv2.imread(batch[2]), cv2.COLOR_BGR2RGB)
            faces = face_detection.process(image)
            if faces.detections is not None:
                if (batch[0] == True) and (len(faces.detections) == 1):
                    correct = correct + 1
            else:
                if (batch[0] == False):
                    correct = correct + 1   
            count = count + 1
            if (count % 1000) == 0:
                print("Processed " + str(count) + " images.")    
    print("Algorithm used: Mediapipe Blazeface") 
    print("Time taken: " + str(time.time() - start) + "s")
    print("Time taken per image: " + str((time.time()-start)/len(data)) + "s")
    print("Number correct: " + str(correct))
    print("Accuracy: " + str((correct/(len(data)))*100) + "%")
    print()
    #################################################################################################
    

def main():
    dataset = make_dataset(PATH1, PATH2)
    print("Total dataset length: " + str(len(dataset)))
    compare(dataset)

if __name__ == '__main__':
    main()

Non-faces stored in database: 100
Faces stored in database: 100
Total dataset length: 200
Beginning algorithm comparisons...
Creating face analysis...
yolov3-tiny_face.weights:: status : file already exists
yolov3_tiny_face.cfg:: status : file already exists
face_detection.weights:: status : file already exists
face_detection.cfg:: status : file already exists
Analyzing batches...
Algorithm used: Yoloface
Time taken: 1.0430898666381836s
Time taken per image: 0.005215504169464111s
Number correct: 0
Accuracy: 0.0%

Algorithm used: Opencv default haar cascade classifier
Time taken: 0.012259960174560547s
Time taken per image: 6.134986877441406e-05s
Number correct: 0
Accuracy: 0.0%

Algorithm used: MTCNN
Time taken: 3.266334533691406e-05s
Time taken per image: 2.300739288330078e-07s
Number correct: 0
Accuracy: 0.0%

Algorithm used: Dlib
Time taken: 0.0025742053985595703s
Time taken per image: 1.293182373046875e-05s
Number correct: 0
Accuracy: 0.0%

Algorithm used: Retinaface, quality=normal