## install libraries

In [None]:
# !pip install opencv-python
# !pip install mediapipe

In [50]:
# !pip install imutils

Collecting imutils
  Using cached imutils-0.5.4-py3-none-any.whl
Installing collected packages: imutils
Successfully installed imutils-0.5.4


## import libraries

In [1]:
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt
import numpy as np

In [2]:
EYE_CASCADE_PATH = 'haarcascade\\eye.xml'
MOUTH_CASCADE_PATH = 'haarcascade\\smile.xml'

# Class of mediapipe face mesh

## Mediapip

## functions

before starting landmark extraction by mediapip, first was checked if two eyes and mouth exist in the face (by dlib for face detection and haarcascade for eye and mouth classification) or not

constant variables

In [3]:

# Mediapip-landmark indices
RIGHT_EYE_POINTS = [130, 243, 23, 27, 33]
LEFT_EYE_POINTS = [463, 257, 359, 253, 263]
MOUTH_POINTS = [57, 287, 12, 13 , 16, 17, 314, 268, 315, 61, 291]
UP_FACE = 337
DOWN_FACE = 396
NOSE_POINT = 19
FOREHEAD_TH = 338


# Points used to detect forehead and chain
HELP_POINTS = MOUTH_POINTS + RIGHT_EYE_POINTS + LEFT_EYE_POINTS

# used for pose
NOSE_POINT_POSE = [1]
CHIN_POINT_POSE = [199]
MOUTH_POINT_POSE = [61, 291]
EYE_POINT_POSE = [33, 263]
HELP_POINTS_POSE = NOSE_POINT_POSE + CHIN_POINT_POSE + MOUTH_POINT_POSE + EYE_POINT_POSE

FORHEAD_RATIO = 0.3

# Used for shadow detection
RIGHT_DOWN_EYE = 23
LEFT_DOWN_EYE = 253
LEFT_MOUTH = 291
RIGHT_MOUTH = 61
EYE_UP = 336
UP_RIGHT = 108
UP_LEFT = 333


## define model of landmark extraction

In [8]:
class FaceMeshModel():
    def __init__(self, image, min_detection_confidence=0.5, model_selection=0):
        self.min_detection_confidence = min_detection_confidence
        self.model_selection = model_selection
        
        # face detection
        self.mp_face_detection= mp.solutions.face_detection
        self.face_detection = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=0)
        
        # face mesh
        self.face_mesh_lib = mp.solutions.face_mesh
        self.face_mesh = self.face_mesh_lib.FaceMesh()
        
        self.current_landmarks = None
        
        self.height, self.width = image.shape[:2]
        self.results = None
        
    def show_landmark(self, frame, v_stream=False):
        landmarks = self.current_landmarks
        width = self.width
        height = self.height
        for landmark in landmarks[:, :2]:
            x = int(landmark[0, 0] * width)
            y = int(landmark[0, 1] * height)
            cv2.circle(frame, (x,y), radius=4, color=(0,0,200), thickness=-1)
        cv2.imshow('landmarks',cv2.resize(frame, (512, 512)))
        if v_stream:
            cv2.waitKey(1)
        else:
            cv2.waitKey(0) 
        
        
    # retrieve landmarks by mediapip 
    def get_landmarks(self, start, frame):
        out = None
        if start:
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # get width and height
            self.height, self.width, ch = frame.shape
            height, width = self.height, self.width
            re = self.face_mesh.process(rgb_frame)
            re_landmark = re.multi_face_landmarks[0].landmark
            out = np.matrix([[int(landmark.x * width), int(landmark.y * height), landmark.z] for landmark in re_landmark])
        return out

    # Task one
    def check_sensivity(self, im, show):
        
        out = {'eye': False, 'mouth': False, 'forehead': None, 'chin': None}
        
        self.current_landmarks = self.get_landmarks(True, im)
        landmarks = np.int16(self.current_landmarks)
        x_start, y_start = np.min(landmarks[:,0]), np.min(landmarks[:,1])
        x_end, y_end = np.max(landmarks[:,0]), np.max(landmarks[:,1])
        
        detect_eye_mouth = False
        detect_forehead = False
        detect_chin = False
        
        height, width = im.shape[:2]
        # threshold for box of face
        th = 0.1
        x_start, y_start = int((max(0, x_start) - th)) , int((max(0, y_start) - th))
        x_end, y_end = int((min(x_end, width) + th)), int((min(y_end, height) + th))
        # gray level of image to input haarcascade classifier
        gr_im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
        # haar cascase detector for eye
        eye_detector = cv2.CascadeClassifier(EYE_CASCADE_PATH)    
        eyeRects = eye_detector.detectMultiScale(
            gr_im[y_start:y_end, x_start:x_end], scaleFactor=1.1, minNeighbors=10,
            minSize=(15, 15), flags=cv2.CASCADE_SCALE_IMAGE)
        
        # haar cascase detector for mouth
        mouth_detector = cv2.CascadeClassifier(MOUTH_CASCADE_PATH)
        mouthRects = mouth_detector.detectMultiScale(
            gr_im[y_start:y_end, x_start:x_end], scaleFactor=1.1, minNeighbors=10,
            minSize=(15, 15), flags=cv2.CASCADE_SCALE_IMAGE)
        # if eye and mouth exist in face without occulsion => extract 468 landmarks
        out['eye'] = len(eyeRects) >= 2
        out['mouth'] = len(mouthRects) >= 1
        if len(eyeRects) >= 2 and len(mouthRects) >= 1:
            detect_eye_mouth = True
        
            
        # detect forehead and chain
        if detect_eye_mouth:

            
            help_landmarks = landmarks[HELP_POINTS, :]
            x_face_start, y_face_start = np.min(help_landmarks[:,0]), np.min(help_landmarks[:,1])
            x_face_end, y_face_end = np.max(help_landmarks[:,0]), np.max(help_landmarks[:,1])
            h_eye_mouth = y_face_end - y_face_start
            # h_forehead = int(FORHEAD_RATIO*(h_eye_mouth))
            right_eye_inner = np.max(landmarks[RIGHT_EYE_POINTS, 0])
            left_eye_inner = np.min(landmarks[LEFT_EYE_POINTS, 0])
            # to cover part of hair in forehead, we use 5 instead of 4 in below equation
            h_forehead = int(4*(left_eye_inner - right_eye_inner)/3)
            w_forehead = np.max(help_landmarks[:,0]) - np.min(help_landmarks[:,0])
            y_forehead_start = y_face_start-h_forehead
            if y_forehead_start >= 0:
                detect_forehead = True
            # cv2.circle(im, (x_face_start,max(0, y_forehead_start)), radius=4, color=(0,0,200), thickness=-1)
            # cv2.circle(im, (x_face_end, y_face_start), radius=4, color=(0,0,200), thickness=-1)
            if not detect_forehead:
                if landmarks[FOREHEAD_TH, 1] >=0:
                    detect_forehead = True
            
            # chin detection
            if height >= landmarks[DOWN_FACE,1]:
                detect_chin = True
            
            out['forehead'], out['chin'] = detect_forehead, detect_chin
            
        return out
    
    def estimate_head_pose(self, img):
        
        height, width = self.height, self.width

        face2d = []
        face3d = []
        landmarks = self.current_landmarks
        
        face2d = [[int(landmark[0, 0]), int(landmark[0, 1])] for landmark in landmarks[HELP_POINTS_POSE,:2]]
        face3d = [[int(landmark[0, 0]), int(landmark[0, 1]), landmark[0, 2]] for landmark in landmarks[HELP_POINTS_POSE,:]]
        focal_len = 1 * width
        
        cam_parameter = np.array([ [focal_len, 0, height / 2],
                                    [0, focal_len, width / 2],
                                    [0, 0, 1]])
        face2d = np.array(face2d, dtype=np.float64)
        face3d = np.array(face3d, dtype=np.float64)
        dist_coff = np.zeros((4, 1), dtype=np.float64)

        success, rot_vec, trans_vec = cv2.solvePnP(face3d, face2d, cam_parameter, dist_coff)

        rmat, jac = cv2.Rodrigues(rot_vec)

        angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)

        x = angles[0] * 360
        y = angles[1] * 360
        z = angles[2] * 360
        
        return x, y, z
        
        
        
        
    
    # Task 2: check appropraite condition
    def check_correct_condition(self, im):
        distance_flag = False
        forward_flag = False
        no_shadow_flag = True
        # estimate distance to camera
        if self.current_landmarks is not None:
            z_dist = self.current_landmarks[NOSE_POINT, 2]
            if -0.15 <= z_dist <= -0.12: 
                distance_flag = True
            
            # pose estimation
            pose = self.estimate_head_pose(im)
            roll, pitch, yaw = pose[0], pose[1], pose[2]
            if -6<roll<6 and -6<pitch<6 and -6<yaw<6:
                forward_flag = True
            no_shadow_flag = self.detect_shadow(im)
            
                
            # shadow existance
            
            
        return {'near': distance_flag, 'forward': forward_flag, 'no shadow': no_shadow_flag}
                
                
                
    # Task 3: check shadow on face
    def detect_shadow(self, im):
        no_shadow_flag = True
        org_im = np.zeros((self.height, self.width))
        
        # To get boundry of face
        landmarks = np.int16(self.current_landmarks)
        Xmin = max(0, np.min(landmarks[:, 0]))
        Ymin = max(0, np.min(landmarks[:, 1]))
        Xmax = max(0, np.max(landmarks[:, 0]))
        Ymax = max(0, np.max(landmarks[:, 1]))
        
        im = im[Ymin: Ymax, Xmin:Xmax]
        if len(im):
        
            gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (7, 7), 0)
            (T, threshInv) = cv2.threshold(blurred, 150, 255,
                cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
            im = threshInv
            
            org_im[Ymin: Ymax, Xmin:Xmax] = im
            im = org_im
    #        cv2.imshow('face', im)
     #       cv2.waitKey(0)
            
            # right cheek coordinates
            x1_r_cheek = int(self.current_landmarks[132,0])
            y1_r_cheek = int(self.current_landmarks[RIGHT_DOWN_EYE,1])
            x2_r_cheek = int(self.current_landmarks[RIGHT_MOUTH, 0])
            y2_r_cheek = int(self.current_landmarks[RIGHT_MOUTH, 1])
            right_cheek = im[y1_r_cheek: y2_r_cheek, x1_r_cheek: x2_r_cheek]
            # cv2.rectangle(im, (x1_r_cheek, y1_r_cheek), (x2_r_cheek, y2_r_cheek), (50, 150, 50), 5)
            
            # left cheek coordinates
            x1_l_cheek = int(self.current_landmarks[LEFT_MOUTH, 0])
            y1_l_cheek = int(self.current_landmarks[LEFT_DOWN_EYE,1])
            x2_l_cheek = int(self.current_landmarks[352, 0])
            y2_l_cheek = int(self.current_landmarks[LEFT_MOUTH, 1])
            left_cheek = im[y1_l_cheek: y2_l_cheek, x1_l_cheek: x2_l_cheek]
            # cv2.rectangle(im, (x1_l_cheek, y1_l_cheek), (x2_l_cheek, y2_l_cheek), (50, 150, 50), 5)
            
            # forehead coordinates
            x1_fh = int(np.min((self.current_landmarks[:, 0])))
            y1_fh = int(np.min(self.current_landmarks[:, 1]))
            x2_fh = int(np.max((self.current_landmarks[:, 0])))
            y2_fh = int(self.current_landmarks[EYE_UP, 1])
            forehead = im[y1_fh: y2_fh, x1_fh: x2_fh]
            # cv2.rectangle(im, (x1_fh, y1_fh), (x2_fh, y2_fh), (50, 150, 100), 5)
            
            skin = np.append(right_cheek, left_cheek)
            skin = np.append(forehead, skin)
            
            area = skin.shape[0]
            white = 100 * len(skin[skin==255]) / area
            black = 100 * len(skin[skin==0]) / area
            
            # Check uniform distribution of illumination
            if min(white, black) >= 20:
                no_shadow_flag = False
            
        return no_shadow_flag   



    def extract_landmarks(self, frame, show=False, v_stream=False):
        landmarks = None
        se_flag = True
        one_face_flag = True
        co_flag = True
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # extract faces
        self.results = self.face_detection.process(rgb_frame).detections
        if self.results:
            if len(self.results) == 1:
                sensivity_cond = self.check_sensivity(frame, show)
                print(sensivity_cond)
                if False in sensivity_cond.values():
                    se_flag = False
                correct_cond = self.check_correct_condition(frame)
                print(correct_cond)
                if False in correct_cond.values():
                    co_flag = False
            else:
                one_face_flag = False
                
            if se_flag and one_face_flag and co_flag: 
                if show:
                    self.show_landmark(frame, v_stream)
                return self.current_landmarks
            else:
                if show:
                    cv2.imshow('frame', frame)
                    if v_stream:
                        cv2.waitKey(1)
                    else:
                        cv2.waitKey(0)
                
        # no face or multiple faces
        return landmarks

# Test

## load image

In [10]:
i = cv2.imread('images\\test2.jpg')
extractor_landmark = FaceMeshModel(i)

extractor_landmark.extract_landmarks(i, show=True)

{'eye': True, 'mouth': True, 'forehead': True, 'chin': True}
{'near': False, 'forward': True, 'no shadow': True}


## start video stream

In [None]:
cap = cv2.VideoCapture(0)
v_stream = True

while True:
    ret, f = cap.read()
    if ret:
        extractor_landmark = FaceMeshModel(f)
        extractor_landmark.extract_landmarks(f, show=True, v_stream=True)