In [1]:
import os
import threading

import cv2 as cv
import dlib
import numpy as np
from deepface import DeepFace
from sklearn.metrics import accuracy_score

2024-04-20 04:04:31.015930: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-20 04:04:31.015995: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-20 04:04:31.069078: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-20 04:04:31.180953: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
models = [
    "VGG-Face",
    "Facenet",
    "Facenet512",
    "OpenFace",
    "DeepFace",
    "DeepID",
    "ArcFace",
    "Dlib",
    "SFace",
    "GhostFaceNet",
]
metrics = ["cosine", "euclidean", "euclidean_l2"]
backends = [
    "opencv",
    "ssd",
    "dlib",
    "mtcnn",
    "fastmtcnn",
    "retinaface",
    "mediapipe",
    "yolov8",
    "yunet",
    "centerface",
    "skip",
]

In [3]:
def convert_bb(rect):  # convert dlib coords into opencv
    startX = rect.left()
    startY = rect.top()
    endX = rect.right()
    endY = rect.bottom()
    w = endX - startX
    h = endY - startY
    return (startX, startY, w, h)

In [4]:
detector = dlib.get_frontal_face_detector()  # setup detector

In [45]:
def basic_face_recognition(img):
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    faces = detector(gray)
    faces = [convert_bb(r) for r in faces]
    for x, y, h, w in faces:
        face = DeepFace.find(
            img_path=img[y : y + h, x : x + w],
            db_path="200classes_data/train",
            model_name=models[1],
            distance_metric=metrics[0],
            detector_backend=backends[10],
            enforce_detection=False,
            normalization="Facenet2018",
            align=True,
            silent=True,
        )
        try:
            person = (
                face[0]
                .identity.head(3)
                .apply(lambda x: x.split("/")[2])
                .agg({"identity": "value_counts"})
                .idxmax()[1]
            )
        except Exception:
            person = "Stranger"
        print(person)

In [44]:
img = cv.imread("test.jpg")

In [46]:
basic_face_recognition(img)

Colin_Powell


In [47]:
def accuracy(base_path, db_path):
    counter = 0
    predictions = []
    expected = []
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            img = cv.imread(file_path)
            gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
            faces = detector(gray)
            faces = [convert_bb(r) for r in faces]
            for x, y, h, w in faces:
                face = DeepFace.find(
                    img_path=img[y : y + h, x : x + w],
                    db_path=db_path,
                    model_name=models[1],
                    distance_metric=metrics[0],
                    detector_backend=backends[10],
                    enforce_detection=False,
                    normalization="Facenet2018",
                    align=True,
                    silent=True,
                )
                try:
                    person = (
                        face[0]
                        .identity.head(3)
                        .apply(lambda x: x.split("/")[2])
                        .agg({"identity": "value_counts"})
                        .idxmax()[1]
                    )
                except Exception:
                    person = "Stranger"
                predictions.append(person)
                expected.append(folder_name)
                counter += 1
    print(f"Done {base_path}")
    return accuracy_score(predictions, expected)

In [7]:
accuracy_200 = accuracy("200classes_data/test", "200classes_data/train")
accuracy_150 = accuracy("150classes_data/test", "150classes_data/train")
accuracy_100 = accuracy("100classes_data/test", "100classes_data/train")
accuracy_50 = accuracy("50classes_data/test", "50classes_data/train")

Done 200classes_data/test
Done 150classes_data/test
Done 100classes_data/test
Done 50classes_data/test


In [8]:
accuracy_200  # facenet2018

0.8560975609756097

In [146]:
accuracy_200  # facenet

0.8146341463414634

In [141]:
accuracy_150  # facenet

0.8306709265175719

In [9]:
accuracy_150  # facenet2018

0.8722044728434505

In [142]:
accuracy_100  # facenet

0.8734177215189873

In [10]:
accuracy_100  # facenet2018

0.9082278481012658

In [143]:
accuracy_50  # facenet

0.9128787878787878

In [12]:
accuracy_50  # facenet2018

0.9128787878787878

Точность модели на 200 классов:85.61%   
Точность модели на 150 классов:87.2%   
Точность модели на 100 классов:90.82%   
Точность модели на 50 классов:91.28%   


In [75]:
cap = cv.VideoCapture("video.mp4")

cap2 = cv.VideoCapture("video2.mp4")

In [53]:
# set the size of the recorded video
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
frame_size = (frame_width, frame_height)
fps = int(cap.get(cv.CAP_PROP_FPS))

In [78]:
def FaceRecognition(faceNames, faceID, frame):
    print("Calling FaceRecognition")
    faces = DeepFace.find(
        img_path=frame,
        db_path="200classes_data/train",
        model_name=models[1],
        distance_metric=metrics[0],
        detector_backend=backends[10],
        enforce_detection=False,
        normalization="Facenet2018",
        silent=True
    )
    try:
        person = (
            faces[0]
            .identity.head(3)
            .apply(lambda x: x.split("/")[2])
            .agg({"identity": "value_counts"})
            .idxmax()[1]
        )
    except Exception:
        person = None
    faceNames[faceID] = person
    print(faceNames, faceID)

In [79]:
def face_tracking_and_recognition(cap):
    #writer = cv.VideoWriter("result2_200classes.mp4", cv.VideoWriter_fourcc(*'DIVX'), 60, frame_size)
    frameCounter = 0
    currentFaceID = 0
    faceTrackers = {}
    faceNames = {}

    while True:
        ret, frame = cap.read()

        resultFrame = frame.copy()

        frameCounter += 1

        faceIDsToDelete = []  # delete bad trackers
        for faceID in faceTrackers.keys():
            trackingQuality = faceTrackers[faceID].update(frame)
            if trackingQuality < 7:
                faceIDsToDelete.append(faceID)
        for faceID in faceIDsToDelete:
            faceTrackers.pop(faceID, None)

        if (frameCounter % 10) == 0:  # start scanning for faces
            gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
            faces = detector(gray)
            faces = [convert_bb(r) for r in faces]

            for x, y, h, w in faces:
                x_bar = x + 0.5 * w
                y_bar = y + 0.5 * h
                matchedFaceID = None

                for faceID in faceTrackers.keys():
                    tracked_position = faceTrackers[faceID].get_position()

                    t_x = int(tracked_position.left())
                    t_y = int(tracked_position.top())
                    t_w = int(tracked_position.width())
                    t_h = int(tracked_position.height())

                    t_x_bar = t_x + 0.5 * t_w
                    t_y_bar = t_y + 0.5 * t_h

                    # if face center in tracked bbox and tracked center in face bbox we approved it
                    if (
                        (t_x <= x_bar <= (t_x + t_w))
                        and (t_y <= y_bar <= (t_y + t_h))
                        and (x <= t_x_bar <= (x + w))
                        and (y <= t_y_bar <= (y + h))
                    ):
                        matchedFaceID = faceID

                if matchedFaceID is None:
                    currentFaceID += 1
                    tracker = dlib.correlation_tracker()
                    tracker.start_track(
                        frame,
                        dlib.rectangle(x - 10, y - 20, x + w + 10, y + h + 20),
                    )

                    faceTrackers[currentFaceID] = tracker
                    t_f = threading.Thread(
                        target=FaceRecognition,
                        args=(faceNames, currentFaceID, frame[y : y + h, x : x + w]),
                        daemon=True,
                    )
                    t_f.start()
            if frameCounter % 300 == 0:  # start face recognition periodically
                for faceID in faceTrackers.keys():
                    tracked_position = faceTrackers[faceID].get_position()

                    t_x = int(tracked_position.left())
                    t_y = int(tracked_position.top())
                    t_w = int(tracked_position.width())
                    t_h = int(tracked_position.height())
                    t = threading.Thread(
                        target=FaceRecognition,
                        args=(
                            faceNames,
                            faceID,
                            frame[t_y : t_y + t_w, t_x : t_x + t_h],
                        ),
                        daemon=True,
                    )
                    t.start()

        for faceID in faceTrackers.keys():
            tracked_position = faceTrackers[faceID].get_position()

            t_x = int(tracked_position.left())
            t_y = int(tracked_position.top())
            t_w = int(tracked_position.width())
            t_h = int(tracked_position.height())

            cv.rectangle(
                resultFrame, (t_x, t_y), (t_x + t_w, t_y + t_h), (0, 255, 0), 2
            )

            if faceID in faceNames.keys():
                cv.putText(
                    resultFrame,
                    faceNames[faceID],
                    (int(t_x + t_w / 2), int(t_y)),
                    cv.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (255, 255, 255),
                    2,
                )
            else:
                cv.putText(
                    resultFrame,
                    "Detecting",
                    (int(t_x + t_w / 2), int(t_y)),
                    cv.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (255, 255, 255),
                    2,
                )

        #writer.write(resultFrame)
        cv.imshow("result", resultFrame)
        if cv.waitKey(1) & 0xFF == ord("q"):
            cap.release()
            break

In [80]:
face_tracking_and_recognition(cap)
cap.release()
cv.destroyAllWindows()

Calling FaceRecognition
{1: None} 1
Calling FaceRecognition
{1: 'George_W_Bush'} 1
Calling FaceRecognition
{1: 'George_W_Bush'} 1


In [22]:
cv.destroyAllWindows()