In [2]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
import cv2
import mediapipe as mp
import numpy as np
from time import time

# Mediapipe configurations
mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

In [3]:
#import sys
#!{sys.executable} -m pip freeze > requirements.txt

In [4]:
def save_features_to_csv(df_features, file_path):
    df_features.to_csv(file_path, index=False)

# Calculate the eye aspect ratio
def calculate_eye_aspect_ratio(eye_landmarks):
    A = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
    B = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])
    C = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])
    return (A + B) / (2.0 * C)

# Calculate the distance between two points
def normalize_vector(v):
    norm = np.linalg.norm(v)
    if norm == 0:
        return v
    return v / norm


In [5]:
video_folder = '/Users/iduli/Desktop/Ch2_25_Scientific_Data/Final/raw_Dataset/SENSORS/VIDEO/parsed_video'
result_folder = '/Users/iduli/Desktop/Ch2_25_Scientific_Data/Final/raw_Dataset/SENSORS/VIDEO'

# Mediapipe, OpenCV and interval configurations
EAR_THRESHOLD = 0.3
FOURCC = 'XVID'

In [None]:
def process_video(video_path):
    frame_idx = 0
    frame_time = 0
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Cannot open video {video_path}")
        return None    
    folder_path, video_file = os.path.split(video_path)
    print(video_file)
    base_name = os.path.splitext(video_file)[0]
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # Initialize variables
    feature_list = []
    prev_lip_corner_left, prev_lip_corner_right = None, None
    prev_left_eye_aspect_ratio, prev_right_eye_aspect_ratio = None, None
    blink_count = 0

    with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.7) as face_detection, \
         mp_face_mesh.FaceMesh(min_detection_confidence=0.7, min_tracking_confidence=0.7, refine_landmarks= True) as face_mesh:
        #print(frame_idx)
        while cap.isOpened():
            # Set frame time, will be added after 영상 시작시간  
            #current_frame_idx = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
            #print(current_frame_idx) frame 0 1 같은시간 읽어짐. 
            frame_time = cap.get(cv2.CAP_PROP_POS_MSEC)
            success, image = cap.read()
            if not success:
                # Case 1: 프레임 읽기 실패
                frame_features = [frame_time, np.nan, np.nan, np.nan, np.nan,
                                  np.nan, np.nan, np.nan, np.nan, np.nan, 1]  # error_type = 1
                break

            # Convert BGR to RGB for Mediapipe
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image_rgb.flags.writeable = False

            # Process face detection and face mesh
            results = face_detection.process(image_rgb)
            mesh_results = face_mesh.process(image_rgb)

            if not results.detections:
                # Case 2: 얼굴 탐지 실패
                frame_features = [frame_time, np.nan, np.nan, np.nan, np.nan,
                                  np.nan, np.nan, np.nan, np.nan, np.nan, 2]  # error_type = 2
                feature_list.append(frame_features)
                continue

            # Convert RGB back to BGR for OpenCV
            image_rgb.flags.writeable = True
            image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

            # Initialize default values
            rotation_x, rotation_y, rotation_z = np.nan, np.nan, np.nan
            total_translation = np.nan
            gaze_direction = np.array([np.nan, np.nan])
            left_lip_movement, right_lip_movement = np.nan, np.nan

            # Process face detection
            if results.detections:
                largest_face = max(results.detections, key=lambda detection: detection.location_data.relative_bounding_box.width * detection.location_data.relative_bounding_box.height)
                blink_count = 0  # Reset blink count for each frame

            # Process face mesh landmarks
            if mesh_results.multi_face_landmarks:
                for face_landmarks in mesh_results.multi_face_landmarks:
                    landmarks = np.array([(lm.x, lm.y, lm.z) for lm in face_landmarks.landmark])

                    # Calculate head movement
                    image_points = np.array([
                        (landmarks[1][0] * image.shape[1], landmarks[1][1] * image.shape[0]),
                        (landmarks[33][0] * image.shape[1], landmarks[33][1] * image.shape[0]),
                        (landmarks[263][0] * image.shape[1], landmarks[263][1] * image.shape[0]),
                        (landmarks[61][0] * image.shape[1], landmarks[61][1] * image.shape[0]),
                        (landmarks[291][0] * image.shape[1], landmarks[291][1] * image.shape[0]),
                        (landmarks[199][0] * image.shape[1], landmarks[199][1] * image.shape[0])
                    ], dtype="double")

                    '''considering changing the points from static value to dynamic value based on landmarks'''
                    model_points = np.array([
                        (0.0, 0.0, 0.0),
                        (-30.0, -125.0, -30.0),
                        (30.0, -125.0, -30.0),
                        (-60.0, -70.0, -60.0),
                        (60.0, -70.0, -60.0),
                        (0.0, -150.0, -100.0)
                    ])

                    size = image.shape
                    focal_length = size[1]
                    center = (size[1] / 2, size[0] / 2)
                    camera_matrix = np.array([
                        [focal_length, 0, center[0]],
                        [0, focal_length, center[1]],
                        [0, 0, 1]
                    ], dtype="double")

                    dist_coeffs = np.zeros((4, 1))
                    success, rotation_vector, translation_vector = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs)

                    if success:
                        rotation_x, rotation_y, rotation_z = rotation_vector.ravel()
                        total_translation = np.linalg.norm(translation_vector)

                        # Calculate eye aspect ratio
                        left_eye_landmarks = landmarks[[33, 160, 158, 133, 153, 144]]
                        right_eye_landmarks = landmarks[[362, 385, 387, 263, 373, 380]]
                        left_ear = calculate_eye_aspect_ratio(left_eye_landmarks)
                        right_ear = calculate_eye_aspect_ratio(right_eye_landmarks)

                        if prev_left_eye_aspect_ratio is not None and prev_right_eye_aspect_ratio is not None:
                            if (left_ear < EAR_THRESHOLD and prev_left_eye_aspect_ratio >= EAR_THRESHOLD) or \
                               (right_ear < EAR_THRESHOLD and prev_right_eye_aspect_ratio >= EAR_THRESHOLD):
                                blink_count += 1

                        prev_left_eye_aspect_ratio = left_ear
                        prev_right_eye_aspect_ratio = right_ear

                        # Calculate gaze direction
                        
                        left_iris_center = np.mean(landmarks[[474, 475, 476, 477]], axis=0)
                        right_iris_center = np.mean(landmarks[[469, 470, 471, 472]], axis=0)
                        nose_tip = landmarks[1]
                        gaze_direction = normalize_vector((left_iris_center + right_iris_center) / 2.0 - nose_tip)

                        # Calculate lip movement
                        left_lip_corner = landmarks[61]
                        right_lip_corner = landmarks[291]
                        if prev_lip_corner_left is not None and prev_lip_corner_right is not None:
                            left_lip_movement = np.linalg.norm(left_lip_corner - nose_tip) - np.linalg.norm(prev_lip_corner_left - nose_tip)
                            right_lip_movement = np.linalg.norm(right_lip_corner - nose_tip) - np.linalg.norm(prev_lip_corner_right - nose_tip)

                        prev_lip_corner_left = left_lip_corner
                        prev_lip_corner_right = right_lip_corner
            # Append features
            frame_features = [frame_time, rotation_x, rotation_y, rotation_z, total_translation,
                              gaze_direction[0], gaze_direction[1], left_lip_movement, right_lip_movement, blink_count]
            feature_list.append(frame_features)
            frame_idx += 1

    cap.release()
    csv_file_path = os.path.join(result_folder, f'{base_name}_features.csv')
    save_features_to_csv(pd.DataFrame(feature_list, columns=[
        "Timestamp", "rotation_x", "rotation_y", "rotation_z",
        "Total Movement", "Gaze X", "Gaze Y",
        "Left Lip Movement", "Right Lip Movement",
        "Blink Count",'error_type']), csv_file_path)

In [None]:
# test 
process_video('/Users/iduli/Desktop/Ch2_25_Scientific_Data/Final/raw_Dataset/SENSORS/VIDEO/parsed_video/3_c3.mp4')