In [7]:
import cv2
import dlib
import mediapipe as mp
import numpy as np
import math
import sys

In [8]:
# Three usful funiton for head-pose estimation
def Ref3DModel():
    modelPoints = [[0.0, 0.0, 0.0],
                   [0.0, -330.0, -65.0],
                   [-225.0, 170.0, -135.0],
                   [225.0, 170.0, -135.0],
                   [-150.0, -150.0, -125.0],
                   [150.0, -150.0, -125.0]]
    return np.array(modelPoints, dtype=np.float64)


def Ref2dImagePoints(landmarks):
    imagePoints = [[landmarks.part(30).x, landmarks.part(30).y],  # Nose tip 
                   [landmarks.part(8).x,  landmarks.part(8).y ],  # Chin 
                   [landmarks.part(36).x, landmarks.part(36).y],  # Left eye left corner 
                   [landmarks.part(45).x, landmarks.part(45).y],  # Right eye right corne 
                   [landmarks.part(48).x, landmarks.part(48).y],  # Left Mouth corner        
                   [landmarks.part(54).x, landmarks.part(54).y]]  # Right mouth corner       
    return np.array(imagePoints, dtype=np.float64)


def CameraMatrix(fl, center):
    cameraMatrix = [[fl, 1, center[0]],
                    [0, fl, center[1]],
                    [0, 0, 1]]
    return np.array(cameraMatrix, dtype=np.float)

In [9]:
def ROIGenerator(img,area_rate=0.8):
    if len(img.shape) == 1:
        height, weight = img.shape
    else:
        height,weight,_ = img.shape
    
    ROI_rate = math.sqrt(area_rate)
    roi_right = weight - int((weight-ROI_rate*weight)/2) 
    roi_bottom = height - int((height-ROI_rate*height)/2) 
    roi_left = int((weight-ROI_rate*weight)/2) 
    roi_top = int((height-ROI_rate*height)/2) 
    
    ROI_bbx = [roi_bottom,roi_right,roi_top,roi_left]
    return ROI_bbx

def CalOcpyofROI(boxA, boxB): # boxA: face; boxB: ROI
    # Per requst of the assignment, the ratio is similar to IoU
    
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
    if interArea == 0:
        return 0
    # compute the area of ROI
    boxBBrea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))

    # compute the intersection over area of ROI
    occupy = interArea / float(boxBBrea)

    # return the intersection over union value
    return occupy

In [10]:
def CheckinROI(ROI_bbx,face,area_rate=0.8):
    face_bbx = [face.bottom(),face.right(),face.top(),face.left()]
    return (CalOcpyofROI(face_bbx,ROI_bbx) >= 0.1)

In [11]:
def CalDistance(img,face):
    # calculate the distance between the detected face and image center 
    if len(img.shape) == 1:
        height, weight = img.shape
    else:
        height,weight,_ = img.shape
    
    img_center =  np.array([int(height/2), int(weight/2)])
    face_center = np.array([int((face.left()+face.right())/2),int((face.top()+face.bottom())/2)])
    return math.sqrt(sum((face_center - img_center)**2))

In [12]:
def HeadPoseDetector(img,landmarks):
    height,width,_ = img.shape
    focal_length = 1 * width
    
    camera_matrix = CameraMatrix(focal_length,(height / 2, width / 2))
    face_3d_model = Ref3DModel()
    ref_img_pts = Ref2dImagePoints(landmarks)
    mdists = np.zeros((4, 1), dtype=np.float64) # Assuming no lens
    
    # calculate rotation and translation vector using solvePnP
    success, rotation_vector, translation_vector = cv2.solvePnP(face_3d_model,
                                        ref_img_pts,
                                        camera_matrix,
                                        mdists)
    
    # calculate nose start and nose end 
    nose_end_point, _ = cv2.projectPoints(np.array([(0.0, 0.0, 500.0)]),
                                        rotation_vector,
                                        translation_vector,
                                        camera_matrix,
                                        mdists)
     
    p1 = ( int(ref_img_pts[0][0]), int(ref_img_pts[0][1])) # start
    p2 = ( int(nose_end_point[0][0][0]), int(nose_end_point[0][0][1])) # end
    
    return p1,p2 #theta and phi

In [14]:
detector =dlib.get_frontal_face_detector()
predictor=dlib.shape_predictor("landmarks/shape_predictor_68_face_landmarks.dat")

cap=cv2.VideoCapture(0)

Colors = [(0,255,0),(0,0,255)] #0:green = focused; 1:red = unfocused

while cap.isOpened() :
    color_flag = []
    distance_list = []
    
    _,frame =cap.read()
    gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    faces=detector(gray)
    
    # generate ROI, ROI_ares = area_rate * image_area
    roi = ROIGenerator(frame,area_rate=0.8)
    roi_bottom,roi_right,roi_top,roi_left = roi
    cv2.rectangle(frame,(roi_left,roi_top),(roi_right,roi_bottom),(0,255,255),1)
    
    for face in faces:
        color_flag.append(Colors[1])
        distance_list.append(CalDistance(frame,face))
    
    if distance_list:
        color_flag[distance_list.index(min(distance_list))] = Colors[0] # find the first nearest face
        
    
    for face,color in zip(faces,color_flag) :
        # Check if the face in ROI 
        if CheckinROI(roi,face): break
        
        # Detect the landmarks
        landmarks = predictor(gray,face)
        
        # Estimate head pose
        pose_start,pose_end = HeadPoseDetector(frame,landmarks)
            
        # drawing the face detection
        cv2.rectangle(frame,(face.left(),face.top()),(face.right(),face.bottom()),color,3)
        
        # drawing the landmarks 
        for n in range (68) :
            x=landmarks.part(n).x
            y=landmarks.part(n).y
            cv2.circle(frame,(x,y),2,color,-1)
              
        # drawing the head pose 
        cv2.line(frame, pose_start, pose_end, (255,0,0), 2)
        cv2.imshow('Face Detection',frame)
        
    if cv2.waitKey(1) == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()