In [8]:
from mtcnn_cv2 import MTCNN
import cv2
import os
import glob
import numpy as np
import time
from face_recognition.face_recognition import Face_recognition
from PIL import Image

In [4]:
test_img_pth = './test'
detected_img_pth = './detected_img'
detector = MTCNN()
face_model = Face_recognition()

In [5]:
def affineMatrix(lmks, scale=4.5):
    nose = np.array(lmks['nose'], dtype=np.float32)
    left_eye = np.array(lmks['left_eye'], dtype=np.float32)
    right_eye = np.array(lmks['right_eye'], dtype=np.float32)
    eye_width = right_eye - left_eye
    angle = np.arctan2(eye_width[1], eye_width[0])
    center = nose
    alpha = np.cos(angle)
    beta = np.sin(angle)
    w = np.sqrt(np.sum(eye_width**2)) * scale
    m = [[alpha, beta, -alpha * center[0] - beta * center[1] + w * 0.5],
        [-beta, alpha, beta * center[0] - alpha * center[1] + w * 0.5]]
    return np.array(m), (int(w), int(w))

# mtcnn-cv2

In [6]:
target = 'nba_many_players.jpeg'

img = cv2.imread(os.path.join(test_img_pth, target))
detected_faces = detector.detect_faces(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

# open cv2_window
cv2.namedWindow("image", cv2.WINDOW_NORMAL) 

for cnt, face in enumerate(detected_faces):
    if face['confidence'] < 0.85: continue    
    mat, size = affineMatrix(face['keypoints'])
    
    # get cropped image to recognition
    result = cv2.warpAffine(img, mat, size)
    result = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
    predict = face_model.predict(result, is_cropped=True)
    if predict == None: continue
    pred_label, _, score = predict
    print(f'predict: {pred_label}, score: {score}')
    
    # save cropped face image to ./detected_img
    cv2.imwrite(os.path.join(detected_img_pth, str(cnt) + '_' +target), cv2.warpAffine(img, mat, size))
    
    # draw face border
    face_pos = face['box']
    cv2.rectangle(img, (face_pos[0], face_pos[1]), (face_pos[0]+face_pos[2],
                                                    face_pos[1]+face_pos[3]), (0, 255, 0), 3)
    
    # output prediction on image
    cv2.putText(img, f'{pred_label}, score:{int(score)}', (face_pos[0], face_pos[1]), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)


cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyWindow('image')
# cv2.destroyAllWindows()



predict: daoko, score: 47.39771792382905
predict: daoko, score: 48.45799785671812
predict: khaby, score: 45.85431344581374
predict: rock, score: 43.61400243007776
predict: daoko, score: 43.31076109048092
predict: trump, score: 44.379658410043426
predict: billie, score: 44.11426060127489
predict: khaby, score: 46.21105988820394
predict: daoko, score: 53.55472239581022
predict: musk, score: 40.74860847357547


In [7]:
for face in detected_faces:
    print(face['box'])
    print(face['confidence'])
    print(face['keypoints'])

[1588, 1, 67, 85]
0.9999768733978271
{'left_eye': (1606, 36), 'right_eye': (1639, 36), 'nose': (1620, 53), 'mouth_left': (1607, 68), 'mouth_right': (1635, 69)}
[1863, 22, 73, 82]
0.9999547004699707
{'left_eye': (1880, 53), 'right_eye': (1912, 52), 'nose': (1891, 74), 'mouth_left': (1881, 87), 'mouth_right': (1907, 85)}
[762, 391, 86, 106]
0.9998884201049805
{'left_eye': (807, 430), 'right_eye': (838, 432), 'nose': (830, 456), 'mouth_left': (804, 475), 'mouth_right': (832, 475)}
[825, 333, 61, 85]
0.9998205304145813
{'left_eye': (842, 365), 'right_eye': (870, 364), 'nose': (856, 379), 'mouth_left': (844, 398), 'mouth_right': (865, 398)}
[937, 107, 87, 113]
0.9998030066490173
{'left_eye': (964, 156), 'right_eye': (1002, 157), 'nose': (983, 184), 'mouth_left': (966, 196), 'mouth_right': (997, 197)}
[192, 64, 67, 93]
0.9997523427009583
{'left_eye': (212, 98), 'right_eye': (241, 107), 'nose': (216, 119), 'mouth_left': (202, 133), 'mouth_right': (226, 140)}
[1149, 0, 72, 72]
0.99971920251846

# Video detection and recognition

In [9]:
target = 'khaby.mp4'
cap = cv2.VideoCapture(os.path.join(test_img_pth, target))
frame_rate = 90
process_rate = 30
base_detection_confidence = 0.95
base_recognition_confidence = 55
prev = 0
process_prev = 0
face_model.test_mode = False

# start video
while cap.isOpened():
    ret, frame = cap.read()
    
    # video end, exiting
    if not ret:
        break 
    
    time_elapsed = time.time() - prev
    process_time_epls = time.time() - process_prev
    
    # set frame process rate
    if process_time_epls > 1./process_rate:
        process_prev = time.time()
    
        # frame process
        detected_faces = detector.detect_faces(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        for cnt, face in enumerate(detected_faces):
            if len(detected_faces) == 0: break
            
            if face['confidence'] < base_detection_confidence: continue    
            mat, size = affineMatrix(face['keypoints'])

            # get cropped image to recognition
            result = cv2.warpAffine(frame, mat, size)
            result = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
            predict = face_model.predict(result, is_cropped=True)
            
            if predict == None: continue
                
            pred_label, _, score = predict
            if score < base_recognition_confidence:
                pred_label = 'Unknown'
                score = 0
            
            # draw face border
            face_pos = face['box']
            cv2.rectangle(frame, (face_pos[0], face_pos[1]), (face_pos[0]+face_pos[2],
                                                            face_pos[1]+face_pos[3]), (0, 255, 0), 3)                
            
            # output prediction on image
            text = f'{pred_label}'
            if pred_label != 'Unknown': text += f', score:{int(score)}'
            
            cv2.putText(frame, text, (face_pos[0], face_pos[1]), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
    
    # set video frame rate and show frame
    if time_elapsed > 1./frame_rate:
        prev = time.time()
        cv2.imshow('frame', frame)
        
    if cv2.waitKey(1) == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()