#### Step 1: Project inspired by the following article: 
https://learnopencv.com/driver-drowsiness-detection-using-mediapipe-in-python/
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10384496/#:~:text=The%20overall%20performance%20of%20the,%25%20for%20left%2Dsided%20falling.

database: 
https://www.kaggle.com/code/scratchpad/notebook18b582f770/edit

Notice that I can choose the coordinates that you wish to analyze with opencv

In [75]:
#intial imports: 
import cv2
import mediapipe as mp

import numpy as np
from numpy import asarray
# import pandas as pd
# import tensorflow as tf
import os
import csv
# from tensorflow.keras.losses import SparseCategoricalCrossentropy
# from tensorflow.keras.optimizers import Adam
import time
# import streamlit as st
from matplotlib import pyplot as plt
from PIL import Image

import json
import requests
 


### first extract the necessary landmarks for the face and yawning


In [76]:
def setup_file(csv_file, num_dimension = 3, num_coords = 33 + 21 + 21):
    rows = []
    #creating empty file in folder, i added the start_time in the name of the csv file, so that if a symbol appears many times in a video, it will still be created in two different csv files, just that they will have different starting times
    # csv_file = f"/users/aly/documents/programming/apps/machine learning/asl converter/training_models/mediapipe/demo_test/demo.csv"
    # csv_file="d:/personnel/other learning/programming/personal_projects/asl_language_translation/training_models/mediapipe/demo_test/demo.csv"
    # if os.path.exists(csv_file):
    #     return 



# Setup CSV File for the videos
# 21 right hand landmarks, 21 left hand landmarks, 33 pose landmarks
    # num_coords = 33 + 21 + 21
    landmarks = []

    # we are only working with x, y
    if num_dimension == 2:
        for val in range(1, num_coords+1):
            landmarks += ['x{}'.format(val), 'y{}'.format(val)]#.format(val), 'z{}'.format(val)]#, 'v{}'.format(val)]
    
    elif num_dimension == 3:
        for val in range(1, num_coords+1):
            landmarks += ['x{}'.format(val), 'y{}'.format(val).format(val), 'z{}'.format(val)]#, 'v{}'.format(val)]
    
    # I will assume they just want all the coordinates:
    else:
        landmarks += ['x{}'.format(val), 'y{}'.format(val).format(val), 'z{}'.format(val), 'v{}'.format(val)]

    print("Initialized an empty landmarks of size:", len(landmarks))

    with open(csv_file, mode='w', newline='') as f:
        csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(landmarks)
    

### Plotting the landmarks chosen and writing only those out

In [77]:
def plot(
    *,
    img_dt,
    img_eye_lmks=None,
    img_eye_lmks_chosen=None,
    face_landmarks=None,
    ts_thickness=1,
    ts_circle_radius=2,
    lmk_circle_radius=3,
    name="1",
):
    # For plotting Face Tessellation
    image_drawing_tool = img_dt 
     
     # For plotting all eye landmarks
    image_eye_lmks = img_dt.copy() if img_eye_lmks is None else img_eye_lmks
     
    # For plotting chosen eye landmarks
    img_eye_lmks_chosen = img_dt.copy() if img_eye_lmks_chosen is None else img_eye_lmks_chosen
 
    # Initializing drawing utilities for plotting face mesh tessellation
    connections_drawing_spec = mp_drawing.DrawingSpec(
        thickness=ts_thickness, 
        circle_radius=ts_circle_radius, 
        color=(255, 255, 255)
    )
 
    # Initialize a matplotlib figure.
    fig = plt.figure(figsize=(20, 15))
    fig.set_facecolor("white")
 
    # Draw landmarks on face using the drawing utilities.
    mp_drawing.draw_landmarks(
        image=image_drawing_tool,
        landmark_list=face_landmarks,
        connections=mp_facemesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=connections_drawing_spec,
    )
 
    # Get the object which holds the x, y, and z coordinates for each landmark
    landmarks = face_landmarks.landmark
 
    # Iterate over all landmarks.
    # If the landmark_idx is present in either all_idxs or all_chosen_idxs,
    # get the denormalized coordinates and plot circles at those coordinates.
 
    for landmark_idx, landmark in enumerate(landmarks):
        if landmark_idx in all_idxs:
            pred_cord = denormalize_coordinates(landmark.x, 
                                                landmark.y, 
                                                imgW, imgH)
            cv2.circle(image_eye_lmks, 
                       pred_cord, 
                       lmk_circle_radius, 
                       (255, 255, 255), 
                       -1
                       )
 
        if landmark_idx in all_chosen_idxs:
            pred_cord = denormalize_coordinates(landmark.x, 
                                                landmark.y, 
                                                imgW, imgH)
            cv2.circle(img_eye_lmks_chosen, 
                       pred_cord, 
                       lmk_circle_radius, 
                       (255, 255, 255), 
                       -1
                       )
 
    # Plot post-processed images
    plt.subplot(1, 3, 1)
    plt.title("Face Mesh Tessellation", fontsize=18)
    plt.imshow(image_drawing_tool)
    plt.axis("off")
 
    plt.subplot(1, 3, 2)
    plt.title("All eye landmarks", fontsize=18)
    plt.imshow(image_eye_lmks)
    plt.axis("off")
 
    plt.subplot(1, 3, 3)
    plt.imshow(img_eye_lmks_chosen)
    plt.title("Chosen landmarks", fontsize=18)
    plt.axis("off")
    plt.show()
    plt.close()
    return

### Capture function for each part of the face

In [78]:
def capture_nose(image, face_landmarks, csv_file):
    # Get nose tip coordinates.
    nose_tip = face_landmarks.landmark[1]
    x, y = int(nose_tip.x * image.shape[1]), int(nose_tip.y * image.shape[0])
    
    # Print nose tip coordinates.
    # print(f"Nose Tip Coordinates: (X: {x}, Y: {y})")


    # writing into the correct csv file
    row = [x, y]
    index = 0

    with open(csv_file, mode='a', newline='') as f:
        # basically, Krish wants the last value only, so i'll delete everything in the file
        f.truncate(0)
        json.dump(row, f)
    #     csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    #                     #   for row in rows:
    #     # writerow expects a list
    #     csv_writer.writerow(row) 

In [79]:
mp_hands = mp.solutions.hands
mp_facemesh = mp.solutions.face_mesh
mp_drawing  = mp.solutions.drawing_utils
denormalize_coordinates = mp_drawing._normalized_to_pixel_coordinates
 

In [80]:
# Landmark points corresponding to left eye
all_left_eye_idxs = list(mp_facemesh.FACEMESH_LEFT_EYE)
# flatten and remove duplicates
all_left_eye_idxs = set(np.ravel(all_left_eye_idxs)) 
 
# Landmark points corresponding to right eye
all_right_eye_idxs = list(mp_facemesh.FACEMESH_RIGHT_EYE)
all_right_eye_idxs = set(np.ravel(all_right_eye_idxs))
 
# Combined for plotting - Landmark points for both eye
all_idxs = all_left_eye_idxs.union(all_right_eye_idxs)
 
# The chosen 12 points:   P1,  P2,  P3,  P4,  P5,  P6
chosen_left_eye_idxs  = [362, 385, 387, 263, 373, 380]
chosen_right_eye_idxs = [33,  160, 158, 133, 153, 144]
all_chosen_idxs = chosen_left_eye_idxs + chosen_right_eye_idxs

def capture_eye(image, face_landmarks, file_name):
    imgH, imgW, _ = image.shape

    # this is an example code where the person gets the nose
    # landmark_0 = results.multi_face_landmarks[0].landmark[0]

    # note: face_landmarks = results.multi_face_landmarks[i], just at different indexes
    print(landmark_0)
    
    landmark_0_x = landmark_0.x * imgW 
    landmark_0_y = landmark_0.y * imgH
    landmark_0_z = landmark_0.z * imgW #
            
    print()
    print("X:", landmark_0_x)
    print("Y:", landmark_0_y)
    print("Z:", landmark_0_z)
    
    print()
    print("Total Length of '.landmark':", len(results.multi_face_landmarks[0].landmark))
    
    # writing into the correct csv file
    rows = []
    index = 0

    with open(csv_file, mode='a', newline='') as f:
        csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                        #   for row in rows:
        # writerow expects a list
        csv_writer.writerow(row) 

In [81]:
def capture_pose(results): #except for eyes and nose, i mean, doesn't really matter
    pass

## First, I'll transform the images in the database into opencv values


In [90]:
def open_cam():   
  # webcam input 
    mp_face_mesh = mp.solutions.face_mesh
    mp_drawing = mp.solutions.drawing_utils
    drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

    cap = cv2.VideoCapture(0)  # 0 for the default webcam

    with mp_face_mesh.FaceMesh(
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as face_mesh:
        
        # to track the frame number, and skip some frames.
        frame_number = 0

        while cap.isOpened():

            success, image = cap.read()
            if not success:
                print("Ignoring empty camera frame.")
                continue
            
            
            # Convert the BGR image to RGB.
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            # To improve performance, mark the image as not writeable.
            image.flags.writeable = False
            results = face_mesh.process(image)

            # Draw the face mesh annotations on the image.
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            if results.multi_face_landmarks:
                for face_landmarks in results.multi_face_landmarks:
                    # Draw face landmarks.
                    mp_drawing.draw_landmarks(
                        image=image,
                        landmark_list=face_landmarks,
                        connections=mp_face_mesh.FACEMESH_CONTOURS,
                        # connections = mp_face_mesh.FACEMESH_LEFT_EYE,
                        landmark_drawing_spec=drawing_spec,
                        connection_drawing_spec=drawing_spec)    
                        
#--------------------------------------------------------------
# treating the different landmarks
                    # using my function to specifically check for the nose
                    # have the nose.csv file already ready!
                    # I want to capture the nose every thirty frames
                    if frame_number % 30 == 0:
                        # if frame_number == 0:
                        # I just want two dimensions, x and y, and I only have 1 coordinate (the nose)
                        #     setup_file("nose.csv", 2, 1)
                        capture_nose(image, face_landmarks, "nose.json")

                    # # I want to capture the eyes every 10 frames
                    if frame_number % 10 == 0:
                        # I want to run the setup file csv once
                        if frame_number == 0:
                            setup_file("eye.csv", 2, 1)
                        capture_eye(image, face_landmarks, "eye.csv")
                        # TODO here, I would run my machine learning model to check if the person has eyes closed

                    if frame_number % 10 == 0:
                        capture_pose(image, face_landmarks, "pose.csv")
                        #TODO here, I run a second machine learning model that tells if the person is yawning
#======================


            # Display the resulting frame
            cv2.imshow('MediaPipe Face Mesh', image)
            
            # stop the process
            if cv2.waitKey(5) & 0xFF == ord('q'): 
                break

            frame_number += 1

        cap.release()
        cv2.destroyAllWindows()
  
    
  # For webcam input:
  # cap = cv2.VideoCapture(0)
  # # ! old code that worked with hand gestures.
  # while cap.isOpened():
  #   success, image = cap.read()
  #   if not success:
  #     print("Ignoring empty camera frame.")
  #     # If loading a video, use 'break' instead of 'continue'.
  #     continue

  #     # Flip the image horizontally for a later selfie-view display, and convert
  #     # the BGR image to RGB.
  #   image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
  #     # To improve performance, optionally mark the image as not writeable to
  #     # pass by reference.
  #   image.flags.writeable = False

  #   stop = capture_eye(image)
  #   if stop:
  #     break
      #!results contains all the information about the image, in this case, we are looking at hands
      # results = hands.process(image)
      
  # #!checks for both hands, and looks if there is data
      # if results.multi_hand_landmarks:
      #   #!extracting the information from the right hand
      #   for hand in results.multi_hand_landmarks:

      #     both_hand = hand
      #     hand_row = list(np.array([[landmark.x, landmark.y] for ids, landmark in both_hand]).flatten())

      # if results.multi_hand_landmarks:
      #   for hand_landmarks in results.multi_hand_landmarks:
      #     empty_row = []
      #     # Here is How to Get All the Coordinates
      #     for ids, landmrk in enumerate(hand_landmarks.landmark):
      #         # print(ids, landmrk)
      #         cx, cy= landmrk.x * image_width, landmrk.y*image_height, #landmrk.z
      #         empty_row.append(cx)
      #         empty_row.append(cy)
      #         # empty_row.append(cz)
      #         # print(cx, cy)
      #     mp_drawing.draw_landmarks(
      #         image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
      #     if index % num_val == 0:
      #       rows.append(empty_row)
      #     index += 1
          # print(len(empty_row))
        # #!else, I wanna just write 0 for the information about the hands.
      # else:
      #   #skip what is not usefulq 
      # #   both_hand_row = list(np.array([[0,0] for i in range(42)]).flatten())
      #   # empty_row = [0 for i in range(42)]
      #   # rows.append(empty_row)
      #   continue

      #!this is what shows the hands
  #     cv2.imshow('MediaPipe Pose', image)

  # # After the loop release the cap object 
  # cap.release() 

  # #closes all instance of the camera
  # cv2.destroyAllWindows()


In [91]:
# num_val is the interval (number of frames) between each capture
open_cam()

### then extract all of the eye landmarks


The %matplotlib inline command tells the IPython environment to draw the plots immediately after the current cell. The drawn plots are shown below the code and stored in the notebook document for future reference.

In [84]:
%matplotlib inline

### open live feed and start getting face coordinates, but feed each face coordinates to a different model

#### Step 2: Get the position of the nose for real-time tracking
and check for the delta of the nose to track the person position on the screen