# Background 
The eye-aspect-ratio (EAR) is a simple way to determine whether a person is blinking or not: https://vision.fe.uni-lj.si/cvww2016/proceedings/papers/05.pdf 
 
This code takes the frames of the people from the webgazer dataset, extract their facemesh, and 

A lot of the code is from: https://learnopencv.com/driver-drowsiness-detection-using-mediapipe-in-python/ 

The blink threshold was chosen from https://www.preprints.org/manuscript/202203.0200/v1 which recommends a threshold of 0.2 for blink vs non-blinks 

# Creating a conda env 
- conda create -n dl_webgazer python=3.7.9 - mediapipe does not run on versions above this
- conda install jupyter
- conda install opencv
- conda install matplotlib

In [111]:
import cv2 
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp # CONDA_SUBDIR=osx-64 conda create -n Env37 python=3.7 gotta downgrade to use mediapipe
import os

In [2]:
# TODO: this does it for a single file directory
directory = '/Users/jc/Desktop/research/WebGazer/www/data/FramesDataset/WebGazerETRA2018Dataset_Release20180420/P_01/1491423217564_3_-study-dot_test.webm_frames/'

In [74]:
# The chosen 12 points:   P1,  P2,  P3,  P4,  P5,  P6
EAR_LEFT_EYE_IDXS  = [362, 385, 387, 263, 373, 380]
EAR_RIGHT_EYE_IDXS = [33,  160, 158, 133, 153, 144]

In [3]:
def image_to_array(image_file_path, showImagePlot=False): 
    image = cv2.imread(image_file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB
    image = np.ascontiguousarray(image)
    
    if showImagePlot: 
        plt.imshow(image)
    
    return image

In [12]:
def extract_face_mesh(image): 
    mp_facemesh = mp.solutions.face_mesh
    
    with mp_facemesh.FaceMesh(
    static_image_mode=True,         # Default=False
    max_num_faces=1,                # Default=1
    refine_landmarks=False,         # Default=False
    min_detection_confidence=0.5,   # Default=0.5
    min_tracking_confidence= 0.5,   # Default=0.5
) as face_mesh:
        return face_mesh.process(image)

In [72]:
def get_pixel_coords(results, image, eyeType):
    imgH, imgW, _ = image.shape         
    
    chosen_landmark_idxs = []
    if eyeType == "left":
        chosen_landmark_idxs = EAR_LEFT_EYE_IDXS
    if eyeType == "right":
        chosen_landmark_idxs = EAR_RIGHT_EYE_IDXS

    mp_drawing  = mp.solutions.drawing_utils
    denormalize_coordinates = mp_drawing._normalized_to_pixel_coordinates
    
    # if no face was detected 
    if results.multi_face_landmarks is None: 
        return {}
    else: 
        pred_coords = {}
        for landmark_idx, landmark in enumerate(results.multi_face_landmarks[0].landmark):
            # For plotting chosen eye landmarks
            if landmark_idx in chosen_landmark_idxs:
                pred_coord = denormalize_coordinates(landmark.x, 
                                                    landmark.y, 
                                                    imgW, imgH)
                pred_coords[landmark_idx] = pred_coord
        return pred_coords

In [84]:
def l2_norm(point_1, point_2):
    dist = sum([(i - j) ** 2 for i, j in zip(point_1, point_2)]) ** 0.5
    return dist

In [103]:
def calculate_ear(coords_points, eyeType):
    if len(coords_points) == 0: #no face detected
        return -1.0 
    
    eye_landmark_idxs = []
    if eyeType == "left":
        eye_landmark_idxs = EAR_LEFT_EYE_IDXS
    if eyeType == "right":
        eye_landmark_idxs = EAR_RIGHT_EYE_IDXS

    P2_P6 = l2_norm(coords_points[eye_landmark_idxs[1]], coords_points[eye_landmark_idxs[5]])
    P3_P5 = l2_norm(coords_points[eye_landmark_idxs[2]], coords_points[eye_landmark_idxs[4]])
    P1_P4 = l2_norm(coords_points[eye_landmark_idxs[0]], coords_points[eye_landmark_idxs[3]])
 
    # Compute the eye aspect ratio
    ear = (P2_P6 + P3_P5) / (2.0 * P1_P4)
    
    return ear

In [109]:
def extract_blink(directory): 
    blinks_by_frame = []
    for image_filename in os.listdir(directory):
        img_file = os.path.join(directory, image_filename)
        if img_file.endswith(".png"): #this naively checks if it is a png or frame 
            image = image_to_array(img_file)
            mp_extraction = extract_face_mesh(image)
            left_eye_pixel_coords = get_pixel_coords(mp_extraction, image, "left")
            right_eye_pixel_coords = get_pixel_coords(mp_extraction, image, "right")
            left_ear = calculate_ear(left_eye_pixel_coords, "left")
            right_ear = calculate_ear(right_eye_pixel_coords, "right")
            avg_ear = (left_ear+right_ear)/2
            
            if avg_ear == -1: # face was not detected in the frame
                blinks_by_frame.append([image_filename, None])
            if avg_ear > 0.20: # there was no blink 
                blinks_by_frame.append([image_filename, False])
            if avg_ear <= 0.20: # indicates a blink
                blinks_by_frame.append([image_filename, True])
    
    return blinks_by_frame

In [110]:
 extract_blink(directory)

[['frame_00000541_00018060.png', False],
 ['frame_00000500_00016739.png', False],
 ['frame_00001031_00034439.png', False],
 ['frame_00000241_00008099.png', False],
 ['frame_00000937_00031320.png', False],
 ['frame_00000927_00030959.png', True],
 ['frame_00000106_00003540.png', False],
 ['frame_00000808_00026999.png', False],
 ['frame_00000831_00027780.png', False],
 ['frame_00000598_00019979.png', False],
 ['frame_00000374_00012540.png', False],
 ['frame_00000931_00031079.png', False],
 ['frame_00000969_00032339.png', False],
 ['frame_00000515_00017220.png', False],
 ['frame_00000829_00027719.png', False],
 ['frame_00000278_00009299.png', False],
 ['frame_00000644_00021539.png', False],
 ['frame_00000998_00033302.png', False],
 ['frame_00000096_00003240.png', False],
 ['frame_00000002_00000119.png', False],
 ['frame_00000769_00025680.png', False],
 ['frame_00000368_00012300.png', False],
 ['frame_00000910_00030419.png', False],
 ['frame_00000454_00015179.png', False],
 ['frame_00000718