In [11]:
!pip install torch



In [12]:
import cv2 
import mediapipe as mp
import math
import numpy as np 
import os 
import time
import torch




In [13]:
def distance(p1, p2):
    ''' Calculate distance between two points
    :param p1: First Point 
    :param p2: Second Point
    :return: Euclidean distance between the points. (Using only the x and y coordinates).
    '''
    return (((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5)


In [14]:
def eye_aspect_ratio(landmarks, eye):
    ''' Calculate the ratio of the eye length to eye width. 
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :param eye: List containing positions which correspond to the eye
    :return: Eye aspect ratio value
    '''
    N1 = distance(landmarks[eye[1][0]], landmarks[eye[1][1]])
    N2 = distance(landmarks[eye[2][0]], landmarks[eye[2][1]])
    N3 = distance(landmarks[eye[3][0]], landmarks[eye[3][1]])
    D = distance(landmarks[eye[0][0]], landmarks[eye[0][1]])
    return (N1 + N2 + N3) / (3 * D)


In [15]:
def eye_feature(landmarks, eye):
    ''' Calculate the eye feature as the average of the eye aspect ratio for the two eyes
    :param landmarks: Face landmarks returned from FaceMesh MediaPipe model
    :param eye: List containing positions which correspond to the eye
    :return: Eye feature value
    '''
    return (eye_aspect_ratio(landmarks, eye) + \
            eye_aspect_ratio(landmarks, eye))/2


In [16]:
def mouth_feature(landmarks, mouth):
    ''' Calculate the mouth feature
    :param landmarks: Face landmarks returned from FaceMesh MediaPipe model
    :param mouth: List containing positions which correspond to the mouth
    :return: Mouth feature value
    '''
    # Your implementation of mouth feature calculation here


In [17]:
def pupil_feature(landmarks):
    ''' Calculate the pupil feature as the average of the pupil circularity for the two eyes
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :return: Pupil feature value
    '''
    return (pupil_circularity(landmarks, left_eye) + \
        pupil_circularity(landmarks, right_eye))/2

In [18]:
def pupil_circularity(landmarks, eye):
    ''' Calculate pupil circularity feature.
    :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
    :param eye: List containing positions which correspond to the eye
    :return: Pupil circularity for the eye coordinates
    '''
    perimeter = distance(landmarks[eye[0][0]], landmarks[eye[1][0]]) + \
            distance(landmarks[eye[1][0]], landmarks[eye[2][0]]) + \
            distance(landmarks[eye[2][0]], landmarks[eye[3][0]]) + \
            distance(landmarks[eye[3][0]], landmarks[eye[0][1]]) + \
            distance(landmarks[eye[0][1]], landmarks[eye[3][1]]) + \
            distance(landmarks[eye[3][1]], landmarks[eye[2][1]]) + \
            distance(landmarks[eye[2][1]], landmarks[eye[1][1]]) + \
            distance(landmarks[eye[1][1]], landmarks[eye[0][0]])
    area = math.pi * ((distance(landmarks[eye[1][0]], landmarks[eye[3][1]]) * 0.5) ** 2)
    return (4*math.pi*area)/(perimeter**2)


In [19]:
def run_face_mp(image):
    with mp_face_mesh.FaceMesh(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as face_mesh:
        
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(image_rgb)
        
        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                mp_drawing.draw_landmarks(
                    image=image,
                    landmark_list=face_landmarks,
                    connections=None,
                    landmark_drawing_spec=drawing_spec,
                    connection_drawing_spec=drawing_spec)

                landmarks_positions = []
                for landmark in face_landmarks.landmark:
                    landmarks_positions.append((landmark.x, landmark.y, landmark.z))
                
            ear, mar, puc, moe = None, None, None, None  # Initialize variables to None

           # Calculate features
            ear = eye_feature(landmarks_positions, left_eye)
            mar = mouth_feature(landmarks_positions, mouth)
            puc = pupil_feature(landmarks_positions)

            if ear is not None and mar is not None and ear != 0:  # Ensure ear is not None or 0
                moe = mar / ear
            else:
                moe = None  # Set moe to None if ear is None or 0

            return ear, mar, puc, moe, image
            
    return -1000, -1000, -1000, -1000, image


In [20]:
import numpy as np

def calibrate(calib_frame_count=100):
    ears = []
    mars = []
    pucs = []
    moes = []

    cap = cv2.VideoCapture(0)
    while len(ears) < calib_frame_count:
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        ear, mar, puc, moe, image = run_face_mp(image)
        if ear != -1000:
            ears.append(ear)
            mars.append(mar)
            pucs.append(puc)
            moes.append(moe)

        cv2.imshow('Calibration', image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

    # Filter out None values and compute mean and standard deviation
    ears_filtered = [value for value in ears if value is not None]
    mars_filtered = [value for value in mars if value is not None]
    pucs_filtered = [value for value in pucs if value is not None]
    moes_filtered = [value for value in moes if value is not None]

    # Calculate mean and standard deviation
    ears_norm = [np.mean(ears_filtered), np.std(ears_filtered)]
    mars_norm = [np.mean(mars_filtered), np.std(mars_filtered)]
    pucs_norm = [np.mean(pucs_filtered), np.std(pucs_filtered)]
    moes_norm = [np.mean(moes_filtered), np.std(moes_filtered)]

    return ears_norm, mars_norm, pucs_norm, moes_norm


In [21]:
def get_classification(input_data):
    ''' Perform classification over the facial  features.
    :param input_data: List of facial features for 20 frames
    :return: Alert / Drowsy state prediction
    '''
    model_input = []
    model_input.append(input_data[:5])
    model_input.append(input_data[3:8])
    model_input.append(input_data[6:11])
    model_input.append(input_data[9:14])
    model_input.append(input_data[12:17])
    model_input.append(input_data[15:])
    model_input = torch.FloatTensor(np.array(model_input))
    preds = torch.sigmoid(model(model_input)).gt(0.5).int().data.numpy()
    return int(preds.sum() >= 5)

In [None]:
def infer(ears_norm, mars_norm, pucs_norm, moes_norm):
    ''' Perform inference.
    :param ears_norm: Normalization values for eye feature
    :param mars_norm: Normalization values for mouth feature
    :param pucs_norm: Normalization values for pupil feature
    :param moes_norm: Normalization values for mouth over eye feature. 
    '''
    ear_main = 0
    mar_main = 0
    puc_main = 0
    moe_main = 0
    decay = 0.9 # use decay to smoothen the noise in feature values

    label = None

    input_data = []
    frame_before_run = 0

    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        ear, mar, puc, moe, image = run_face_mp(image)
        if ear != -1000:
            ear = (ear - ears_norm[0])/ears_norm[1] 
            if mar is not None:  # Add this check
                mar = (mar - mars_norm[0])/mars_norm[1]
            else:
                default_mar_value = 0  # You can set any default value you prefer
                mar = default_mar_value 
            puc = (puc - pucs_norm[0])/pucs_norm[1]
            if moe is not None:  # Add this check
                moe = (moe - moes_norm[0])/moes_norm[1]
            else:
                default_moe_value = 0
                moe = default_moe_value
            if ear_main == -1000:
                ear_main = ear
                mar_main = mar
                puc_main = puc
                moe_main = moe
            else:
                ear_main = ear_main*decay + (1-decay)*ear
                mar_main = mar_main*decay + (1-decay)*mar
                puc_main = puc_main*decay + (1-decay)*puc
                moe_main = moe_main*decay + (1-decay)*moe
        else:
            ear_main = -1000
            mar_main = -1000
            puc_main = -1000
            moe_main = -1000
        
        if len(input_data) == 20:
            input_data.pop(0)
        input_data.append([ear_main, mar_main, puc_main, moe_main])

        frame_before_run += 1
        if frame_before_run >= 15 and len(input_data) == 20:
            frame_before_run = 0
            label = get_classification(input_data)
            print ('got label ', label)
        
        cv2.putText(image, "EAR: %.2f" %(ear_main), (int(0.02*image.shape[1]), int(0.07*image.shape[0])),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
        cv2.putText(image, "MAR: %.2f" %(mar_main), (int(0.27*image.shape[1]), int(0.07*image.shape[0])),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
        cv2.putText(image, "PUC: %.2f" %(puc_main), (int(0.52*image.shape[1]), int(0.07*image.shape[0])),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
        cv2.putText(image, "MOE: %.2f" %(moe_main), (int(0.77*image.shape[1]), int(0.07*image.shape[0])),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
        if label is not None:
            if label == 0:
                color = (0, 255, 0)
            else:
                color = (0, 0, 255)
            cv2.putText(image, "%s" %(states[label]), (int(0.02*image.shape[1]), int(0.2*image.shape[0])),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, 2)

        cv2.imshow('MediaPipe FaceMesh', image)
        if cv2.waitKey(5) & 0xFF == ord("q"):
            break
    
    cv2.destroyAllWindows()
    cap.release()

    
right_eye = [[33, 133], [160, 144], [159, 145], [158, 153]] # right eye landmark positions
left_eye = [[263, 362], [387, 373], [386, 374], [385, 380]] # left eye landmark positions
mouth = [[61, 291], [39, 181], [0, 17], [269, 405]] # mouth landmark coordinates
states = ['alert', 'drowsy']

# Declaring FaceMesh model
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    min_detection_confidence=0.3, min_tracking_confidence=0.8)
mp_drawing = mp.solutions.drawing_utils 
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

model_lstm_path = "C:\\Users\\bhavy\\cnn\\clf_lstm_jit6 (3).pth"
model = torch.jit.load(model_lstm_path)
model.eval()

print ('Starting calibration. Please be in neutral state')
time.sleep(5)
ears_norm, mars_norm, pucs_norm, moes_norm = calibrate()

print ('Starting main application')
time.sleep(1)
infer(ears_norm, mars_norm, pucs_norm, moes_norm)

face_mesh.close() 


Starting calibration. Please be in neutral state


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Starting main application
got label  0
got label  0
got label  0
got label  0
got label  0
got label  0
got label  0
got label  1
got label  0
got label  0
got label  1
got label  1
got label  0
got label  0
got label  0
got label  0
