In [1]:
# --- IMPORT MEDIAPIPE LÊN ĐẦU TIÊN ---
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import Image as MpImage

import cv2 # Thư viện này thường gây xung đột
import numpy as np
import pandas as pd
import os
import time
from scipy.spatial import distance as dist #Sử dụng spicy để tính các CT Euclidean

In [2]:
#--- HẰNG SỐ CƠ BẢN ---
DATA_ROOT = "./Datasets_train"
OUTPUT_DIR = "./Outputs" # Output Directory
OUTPUT_CSV = os.path.join(OUTPUT_DIR, 'training-feature.csv')
DUMMY_VALUE = -999.0 # Giá trị Dummy / Place holder khi dữ liệu không tin cậy

In [106]:
#--- 1. Chỉ mục Mediapipe ---
RIGHT_EYE_INDICES = [33, 160, 158, 133, 153, 144]
LEFT_EYE_INDICES = [362, 385, 387, 263, 373, 380]
MOUTH_INDICES = [61, 291, 0, 17] # Chỉ cần 4 điểm chính cho MAR cơ bản

# Chỉ mục cho Pose Estimation (Yaw / Pitch / Roll) - 5 điểm
# Mũi (1), Mắt trái (33), Mắt phải (263), Miệng trái (61), Miệng phải (291)
HEAD_POSE_INDICES = [1, 33, 263, 61, 291, 152]

# Chỉ mục cho Pose Landmarker (MP Pose - 33 điểm)
NOSE_POSE_INDEX = 0
LEFT_SHOULDER_INDEX = 11
RIGHT_SHOULDER_INDEX = 12

#--- 2. Hằng số PnP (Perspective-n-Point) (Cho Head Pose) --- 
"""
Trong nhiều dự án giám sát người lái xe, CAMERA_MATRIX thường được ước tính
(ví dụ: fx = fy = width và tâm ảnh là trung tâm ảnh) vì việc calibrate (hiệu chuẩn) camera
là không thực tế.

solvePnP sử dụng CAMERA_MATRIX này để mô phỏng cách khuôn mặt 3D được chiếu lên mặt phẳng 2D của camera.
"""

MODEL_3D_POINTS = np.array([
    (0.0, 0.0, 0.0),            # Nose Tip (1)
    (-225.0, 170.0, -135.0),    # Left Eye (33)
    (225.0, 170.0, -135.0),     # Right Eye (263)
    (-150.0, -150.0, -125.0),   # Left Mouth (61)
    (150.0, -150.0, -125.0),    # Right Mouth (291)
    (0.0, -330.0, -65.0)        # Chin (152)
], dtype=np.float32)

#--- 3. Ngưỡng lọc dữ liệu bẩn ---
MAX_ACCEPTABLE_YAW = 45.0       # Độ quay ngang tối đa chấp nhận
MAX_ACCEPTABLE_PITCH = 45.0     # Độ gập tối đa chấp nhận

In [107]:
# 1. Hàm tính tỷ lệ khung hình mắt (EAR)
def eye_aspect_ratio(eye_coords):
    # eye_coords: 6 điểm mắt (p1-p6)
    # Lấy 6 điểm theo thứ tự (giả định đã đúng: p1-p4 là ngang, p2-p6 và p3-p5 là dọc)

    p1, p2, p3, p4, p5, p6 = eye_coords

    # Khoảng cách dọc:
    vertical_1 = dist.euclidean(p2, p6)
    vertical_2 = dist.euclidean(p3, p5)

    # Khoảng cách ngang:
    horizontal = dist.euclidean(p1, p4)

    # Công thức EAR:
    if horizontal == 0:
        return 0.001
    return (vertical_1 + vertical_2) / (2.0 * horizontal)

In [108]:
# 2. Hàm tính tỷ lệ khung hình miệng (MAR)
def mouth_aspect_ratio(mouth_coords):
    # mouth_coords: 4 điểm môi (61, 291, 0, 17)
    p1_h, p4_h, p2_v, p6_v = mouth_coords

    # Khoảng cách dọc
    vertical = dist.euclidean(p2_v, p6_v)
    # Khoảng cách ngan
    horizontal = dist.euclidean(p1_h, p4_h)

    if horizontal == 0:
        return 0.001
    return vertical / horizontal

In [109]:
#--- Hàm Tính góc quay đầu (Sử dụng PnP) ---
import math

def _compute_fallback_head_pose(landmarks_list, w, h):
    """Approximate yaw/pitch/roll from 2D face landmarks if solvePnP fails.
    This is a heuristic: yaw from nose horizontal offset, pitch from nose vertical offset,
    roll from eye-line tilt.
    Returns (yaw, pitch, roll) in degrees.
    """
    try:
        xs = [float(lm.x) * w for lm in landmarks_list]
        ys = [float(lm.y) * h for lm in landmarks_list]
        min_x, max_x = min(xs), max(xs)
        min_y, max_y = min(ys), max(ys)
        face_w = max_x - min_x if (max_x - min_x) > 1e-6 else float(w)
        face_h = max_y - min_y if (max_y - min_y) > 1e-6 else float(h)

        # key points (use safe indexing)
        def safe_pt(idx):
            try:
                p = landmarks_list[idx]
                return float(p.x) * w, float(p.y) * h
            except Exception:
                return (min_x + face_w/2.0, min_y + face_h/2.0)

        nose_x, nose_y = safe_pt(1)
        left_eye_x, left_eye_y = safe_pt(33)
        right_eye_x, right_eye_y = safe_pt(263)

        center_x = (min_x + max_x) / 2.0
        center_y = (min_y + max_y) / 2.0

        # Yaw: horizontal offset of nose relative to face width scaled to degrees
        yaw = ((nose_x - center_x) / (face_w + 1e-6)) * 60.0

        # Pitch: vertical offset (positive when nose is above center -> looking up)
        pitch = ((center_y - nose_y) / (face_h + 1e-6)) * 40.0

        # Roll: angle of the line between eyes
        roll_rad = math.atan2((right_eye_y - left_eye_y), (right_eye_x - left_eye_x + 1e-6))
        roll = math.degrees(roll_rad)

        # clamp to plausible human ranges
        yaw = max(min(yaw, 120.0), -120.0)
        pitch = max(min(pitch, 120.0), -120.0)
        roll = max(min(roll, 120.0), -120.0)

        return yaw, pitch, roll
    except Exception:
        return DUMMY_VALUE, DUMMY_VALUE, DUMMY_VALUE


def get_head_pose(landmarks_list, w, h):
    """
    Compute yaw, pitch, roll (in degrees) from face landmarks using solvePnP.
    Returns ((yaw, pitch, roll), image_points) or, on failure, attempts a heuristic fallback
    and still returns numeric yaw/pitch/roll (not DUMMY) where possible.
    """
    try:
        # Build 2D image points as floats (do NOT cast to int)
        image_points = []
        for i in HEAD_POSE_INDICES:
            lm = landmarks_list[i]
            x = float(lm.x) * float(w)
            y = float(lm.y) * float(h)
            image_points.append((x, y))
        image_points = np.array(image_points, dtype=np.float32)

        # Camera matrix: apply a focal factor to stabilize PnP
        FOCAL_FACTOR = 2.5
        focal_length = FOCAL_FACTOR * float(w)
        center = (float(w) / 2.0, float(h) / 2.0)
        camera_matrix = np.array([
            [focal_length, 0.0, center[0]],
            [0.0, focal_length, center[1]],
            [0.0, 0.0, 1.0]
        ], dtype="double")

        dist_coeffs = np.zeros((4, 1))

        # Prefer ITERATIVE solver for stability
        success, rotation_vector, translation_vector = cv2.solvePnP(
            MODEL_3D_POINTS, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE
        )

        if success:
            rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

            # Stable conversion to Euler angles (Tait-Bryan ZYX: yaw (z), pitch (y), roll (x))
            sy = math.sqrt(rotation_matrix[0, 0] * rotation_matrix[0, 0] + rotation_matrix[1, 0] * rotation_matrix[1, 0])
            singular = sy < 1e-6
            if not singular:
                rx = math.atan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
                ry = math.atan2(-rotation_matrix[2, 0], sy)
                rz = math.atan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
            else:
                rx = math.atan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
                ry = math.atan2(-rotation_matrix[2, 0], sy)
                rz = 0.0

            roll = math.degrees(rx)
            pitch = math.degrees(ry)
            yaw = math.degrees(rz)

            # sanity clamp
            yaw = max(min(yaw, 180.0), -180.0)
            pitch = max(min(pitch, 180.0), -180.0)
            roll = max(min(roll, 180.0), -180.0)

            return (yaw, pitch, roll), image_points

        # If solvePnP failed, try a 2D-landmark heuristic fallback (still return numeric angles)
        fallback_yaw, fallback_pitch, fallback_roll = _compute_fallback_head_pose(landmarks_list, w, h)
        return (fallback_yaw, fallback_pitch, fallback_roll), image_points

    except Exception:
        # As a last resort return DUMMYs
        return (DUMMY_VALUE, DUMMY_VALUE, DUMMY_VALUE), None

In [110]:
# --- Debug: print head-pose (yaw/pitch/roll) for up to 5 sample images ---
# This cell will attempt to initialize MediaPipe landmarkers if the .task files exist.
# If not present it will list up to 5 sample image paths for manual inspection.
import os

face_model_path = os.path.join('model', 'face_landmarker.task')
pose_model_path = os.path.join('model', 'pose_landmarker_full.task')

# collect up to 5 image paths from DATA_ROOT
sample_paths = []
for d in sorted(os.listdir(DATA_ROOT)):
    dd = os.path.join(DATA_ROOT, d)
    if not os.path.isdir(dd):
        continue
    for f in sorted(os.listdir(dd)):
        if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            sample_paths.append(os.path.join(dd, f))
        if len(sample_paths) >= 5:
            break
    if len(sample_paths) >= 5:
        break

print('Sample images found:', len(sample_paths))
for p in sample_paths:
    print(' -', p)

if not (os.path.exists(face_model_path) and os.path.exists(pose_model_path)):
    print('\nMediaPipe .task files not found; cannot compute head-pose.\nPlace face_landmarker.task and pose_landmarker_full.task in ./model/ and re-run this cell to compute yaw/pitch/roll.')
else:
    try:
        base_options_face = python.BaseOptions(model_asset_path=face_model_path)
        face_options = vision.FaceLandmarkerOptions(base_options=base_options_face, running_mode=vision.RunningMode.IMAGE)
        face_landmarker = vision.FaceLandmarker.create_from_options(face_options)
    except Exception as e:
        print('Failed to initialize face landmarker:', e)
        face_landmarker = None

    if face_landmarker is None:
        print('Face landmarker not available; aborting head-pose debug.')
    else:
        import cv2
        for p in sample_paths:
            img = cv2.imread(p)
            if img is None:
                print('Failed to read', p)
                continue
            h, w = img.shape[:2]
            try:
                rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                mp_img = MpImage(image_format=mp.ImageFormat.SRGB, data=rgb)
                fres = face_landmarker.detect(mp_img)
                fl = getattr(fres, 'face_landmarks', None)
                if not fl or len(fl) == 0:
                    print(p, '-> no face detected')
                    continue
                landmarks = fl[0]
                (yaw, pitch, roll), _ = get_head_pose(landmarks, w, h)
                print(p, '-> yaw={:.2f}, pitch={:.2f}, roll={:.2f}'.format(yaw, pitch, roll))
            except Exception as e:
                print('Error processing', p, ':', e)

Sample images found: 5
 - ./Datasets_train\Awake\IMG20221206132733_jpg.rf.6916269feaae0a76717779a4fec1b405.jpg
 - ./Datasets_train\Awake\IMG20221206132734_jpg.rf.8857d95562d32d69b29405817a0ce73e.jpg
 - ./Datasets_train\Awake\IMG20221206132734_jpg.rf.fa2860bcb0f346ee11cdd90295ef33e9.jpg
 - ./Datasets_train\Awake\IMG20221206132738_jpg.rf.991a1644edbaf17262f08cdc5f66aaf3.jpg
 - ./Datasets_train\Awake\IMG20221206132910_jpg.rf.541ad8d4c39ec3c19531bbc08927248a.jpg
./Datasets_train\Awake\IMG20221206132733_jpg.rf.6916269feaae0a76717779a4fec1b405.jpg -> yaw=178.46, pitch=-18.56, roll=158.95
./Datasets_train\Awake\IMG20221206132734_jpg.rf.8857d95562d32d69b29405817a0ce73e.jpg -> yaw=-179.23, pitch=-25.06, roll=168.76
./Datasets_train\Awake\IMG20221206132734_jpg.rf.fa2860bcb0f346ee11cdd90295ef33e9.jpg -> yaw=-179.24, pitch=-25.14, roll=165.92
./Datasets_train\Awake\IMG20221206132738_jpg.rf.991a1644edbaf17262f08cdc5f66aaf3.jpg -> yaw=6.93, pitch=-5.24, roll=-146.26
./Datasets_train\Awake\IMG2022120

In [111]:
#--- Hàm tính toán độ gục đầu (Slump) và nghiêng vai (Tilt)
def calculate_slump_features(face_landmarks, pose_landmarks, w, h):
    """Tính khoảng cách đầu-vai (Slump) và độ nghiêng vai (Tilt)."""

    d_slump = 0.0
    r_tilt = 0.0

    # Kiểm tra tính sẵn có của landmark Pose (TH: Không thấy vai)
    if not pose_landmarks or len(pose_landmarks) < 13: # Cần ít nhất 13 điểm (đến vai)
        # Giả sử đầu nằm ở vị trí chuẩn nếu không tìm thấy pose
        d_slump = DUMMY_VALUE; r_tilt = DUMMY_VALUE
        return d_slump, r_tilt # -1.0, -1.0
    
    # Lấy điểm Vai và Mũi (sử dụng Pose Landmarker)
    p_left_shoulder = pose_landmarks[LEFT_SHOULDER_INDEX]
    p_right_shoulder = pose_landmarks[RIGHT_SHOULDER_INDEX]
    p_nose = pose_landmarks[NOSE_POSE_INDEX]

    # Chuyển đổi sang toạ độ pixel
    y_nose = p_nose.y * h
    y_shoulder_mid = (p_left_shoulder.y *h + p_right_shoulder.y * h) / 2

    # D_SLUMP (Khoảng cách dọc đầu so với vai)
    # Giá trị lớn / dương khi đầu gục xuống, nhỏ / âm khi đầu ngửa lên
    d_slump = y_nose - y_shoulder_mid # CẬP NHẬT CÔNG THỨC: Lấy Mũi - Vai để dương khi gục (mũi đi xuống)

    #R_TILT (Góc nghiêng vai)
    # Toạ độ pixel
    x_l_shoulder = p_left_shoulder.x * w
    y_l_shoulder = p_left_shoulder.y * h
    x_r_shoulder = p_right_shoulder.x * w
    y_r_shoulder = p_right_shoulder.y * h

    # Góc nghiêng so với dường ngang
    shoulder_angle_rad = np.arctan2(y_r_shoulder - y_l_shoulder, x_r_shoulder - x_l_shoulder)
    r_tilt = np.degrees(shoulder_angle_rad)

    return d_slump, r_tilt

In [112]:
#--- Hàm tổ hợp, nơi logic lọc dữ dữ liệu và logic phân loại Passed Out ---

def extract_features(image, face_landmarker, pose_landmarker):

    # 10 đặc trưng: EAR, MAR, PITCH, YAW, ROLL, D_SLUMP, R_TILT, EYE_CL (normalized), FACIAL_DISPLAYED, POSE_DISPLAYED
    features = np.zeros(10)
    try:
        h, w, c = image.shape
    except Exception:
        return features, False

    # Chuyển BGR sang RGB và tạo đối tượng Mediapipe Image
    try:
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mp_image = MpImage(image_format=mp.ImageFormat.SRGB, data=rgb_image)
    except Exception:
        return features, False

    # Phát hiện khuôn mặt và tư thế (bảo vệ bằng try/except)
    try:
        face_result = face_landmarker.detect(mp_image)
        pose_result = pose_landmarker.detect(mp_image)
    except Exception:
        return features, False

    # Safe extraction of landmark lists
    face_landmarks_list = getattr(face_result, 'face_landmarks', None)
    pose_landmarks_list = getattr(pose_result, 'pose_landmarks', None)

    face_disp = 1.0 if (face_landmarks_list and len(face_landmarks_list) > 0) else 0.0

    # Determine pose display: try to be permissive with different return types
    pose_disp = 0.0
    if pose_landmarks_list:
        try:
            first_pose = pose_landmarks_list[0]
            npoints = len(first_pose)
        except Exception:
            try:
                npoints = len(pose_landmarks_list)
            except Exception:
                npoints = 0
        if npoints >= 13:
            pose_disp = 1.0

    # Default raw values
    avg_ear_raw = DUMMY_VALUE
    mar_raw = DUMMY_VALUE
    normalized_eye_closure_raw = DUMMY_VALUE
    pitch = DUMMY_VALUE
    yaw = DUMMY_VALUE
    roll = DUMMY_VALUE
    d_slump_raw = DUMMY_VALUE
    r_tilt_raw = DUMMY_VALUE

    # CASES:
    # - Both present (face_disp==1 and pose_disp==1): compute all normally
    # - Face present only (face_disp==1, pose_disp==0): compute facial features and head-pose from face; body metrics set to DUMMY
    # - Pose present only (face_disp==0, pose_disp==1): compute body metrics from pose; facial features set to DUMMY
    # - Both missing: keep all DUMMY and mark unreliable

    # If face present -> compute facial features and head pose (PnP)
    if face_disp:
        try:
            face_landmarks = face_landmarks_list[0]
            # Trích xuất toạ độ Pixel (2D)
            landmarks_points = []
            for landmark in face_landmarks:
                x = float(landmark.x) * float(w)
                y = float(landmark.y) * float(h)
                landmarks_points.append((x, y))

            # Head pose from face mesh (PnP) - protect against PnP failures
            try:
                (yaw, pitch, roll), _ = get_head_pose(face_landmarks, w, h)
            except Exception:
                yaw = pitch = roll = DUMMY_VALUE

            # Build eye/mouth coordinate lists safely (guard index errors)
            try:
                left_eye_coords = [landmarks_points[i] for i in LEFT_EYE_INDICES]
                right_eye_coords = [landmarks_points[i] for i in RIGHT_EYE_INDICES]
                mouth_coords = [landmarks_points[i] for i in MOUTH_INDICES]
            except Exception:
                left_eye_coords = right_eye_coords = mouth_coords = None

            if left_eye_coords and right_eye_coords:
                try:
                    avg_ear_raw = (eye_aspect_ratio(left_eye_coords) + eye_aspect_ratio(right_eye_coords)) / 2.0
                except Exception:
                    avg_ear_raw = DUMMY_VALUE

                try:
                    eye_closure_raw = (dist.euclidean(left_eye_coords[1], left_eye_coords[5]) +
                                       dist.euclidean(right_eye_coords[1], right_eye_coords[5])) / 2.0
                except Exception:
                    eye_closure_raw = DUMMY_VALUE
            else:
                eye_closure_raw = DUMMY_VALUE

            if mouth_coords:
                try:
                    mar_raw = mouth_aspect_ratio(mouth_coords)
                except Exception:
                    mar_raw = DUMMY_VALUE

            # If pose available, compute slump/tilt; else set to DUMMY per requested policy
            if pose_disp:
                try:
                    # pose_landmarks_list[0] should be sequence of landmarks
                    d_slump_raw, r_tilt_raw = calculate_slump_features(face_landmarks, pose_landmarks_list[0], w, h)
                except Exception:
                    d_slump_raw = DUMMY_VALUE; r_tilt_raw = DUMMY_VALUE
            else:
                # Face present but body pose missing -> mark body metrics as DUMMY
                d_slump_raw = DUMMY_VALUE
                r_tilt_raw = DUMMY_VALUE

            # Normalize Eye_CL by face bounding box width (scale-invariant)
            try:
                xs = [p[0] for p in landmarks_points]
                face_w = (max(xs) - min(xs)) if xs else 1.0
                normalized_eye_closure_raw = eye_closure_raw / (face_w + 1e-6)
            except Exception:
                normalized_eye_closure_raw = DUMMY_VALUE

        except Exception:
            # If any face-processing error occurs, mark as unreliable but continue
            return features, False

    else:
        # face not present
        if pose_disp:
            try:
                # Pose-only: compute slump/tilt from pose
                d_slump_raw, r_tilt_raw = calculate_slump_features(None, pose_landmarks_list[0], w, h)
            except Exception:
                d_slump_raw = DUMMY_VALUE; r_tilt_raw = DUMMY_VALUE
            # Facial features remain DUMMY (avg_ear_raw, mar_raw, eye closure)
        # else: both missing -> keep DUMMYs

    # Round numeric values to 4 decimals BEFORE adding to numpy array
    try:
        avg_ear = round(float(avg_ear_raw), 4)
    except Exception:
        avg_ear = float(DUMMY_VALUE)
    try:
        mar = round(float(mar_raw), 4)
    except Exception:
        mar = float(DUMMY_VALUE)
    try:
        eye_closure = round(float(normalized_eye_closure_raw), 4)
    except Exception:
        eye_closure = float(DUMMY_VALUE)

    try:
        d_slump = round(float(d_slump_raw), 4)
    except Exception:
        d_slump = float(DUMMY_VALUE)
    try:
        r_tilt = round(float(r_tilt_raw), 4)
    except Exception:
        r_tilt = float(DUMMY_VALUE)

    # Ensure head pose values are floats and rounded if available
    try:
        pitch = round(float(pitch), 4)
        yaw = round(float(yaw), 4)
        roll = round(float(roll), 4)
    except Exception:
        pitch = float(DUMMY_VALUE); yaw = float(DUMMY_VALUE); roll = float(DUMMY_VALUE)

    # Assemble features: note order matches model expectation
    features = np.array([avg_ear, mar, pitch, yaw, roll, d_slump, r_tilt, eye_closure, float(face_disp), float(pose_disp)])

    # Mark as reliable if at least one of face or pose is present
    is_reliable = True if (face_disp == 1.0 or pose_disp == 1.0) else False
    return features, is_reliable

In [113]:
# Preprocessing + prediction wrapper implementing Dynamic Input Masking strategy
import numpy as np

def preprocess_for_prediction(features, slump_threshold=50.0):
    """Apply the dynamic-masking rules described in the strategy.

    features: iterable length 10 -> [EAR, MAR, PITCH, YAW, ROLL, D_SLUMP, R_TILT, EYE_CL, face_disp, pose_disp]
    Returns (processed_features, heuristic_str) where heuristic_str is set for Scenario C or other non-NN outcomes.
    """
    f = np.array(features, dtype=float).flatten()
    if f.size < 10:
        return None, 'invalid_input'

    ear, mar, pitch, yaw, roll, d_slump, r_tilt, eye_cl, face_disp, pose_disp = f

    # Scenario A: both present -> pass through
    if face_disp == 1.0 and pose_disp == 1.0:
        proc = np.array([ear, mar, pitch, yaw, roll, d_slump, r_tilt, eye_cl, face_disp, pose_disp], dtype=float)
        return proc, None

    # Scenario B: face present, pose missing -> neutralize body metrics
    if face_disp == 1.0 and pose_disp == 0.0:
        d_slump = 0.0
        r_tilt = 0.0
        proc = np.array([ear, mar, pitch, yaw, roll, d_slump, r_tilt, eye_cl, face_disp, pose_disp], dtype=float)
        return proc, None

    # Scenario C: face missing, pose present -> heuristic override
    if face_disp == 0.0 and pose_disp == 1.0:
        if d_slump > slump_threshold:
            return None, 'Pass Out'
        else:
            return None, 'Unknown/Adjust Camera'

    # No user detected (both missing)
    return None, 'No User Detected'


def predict_with_model(model, features, slump_threshold=50.0):
    """Preprocess features and either apply heuristic or call model.predict.
    Returns a dict: {'heuristic': str} or {'prediction': model_output} or {'error': str}.
    """
    processed, heuristic = preprocess_for_prediction(features, slump_threshold)
    if heuristic:
        return {'heuristic': heuristic}

    X = np.array([processed], dtype=float)
    try:
        pred = model.predict(X)
    except Exception as e:
        return {'error': str(e)}
    return {'prediction': pred}

# Quick mock demo using a simple fake model to show behavior for the three scenarios
class MockModel:
    def predict(self, X):
        # simple mock: return class probabilities array
        return np.array([[0.1, 0.9]])

m = MockModel()
print('Scenario A (face=1,pose=1):', predict_with_model(m, [0.1234, 0.5, -10.0, 0.0, 1.0, 20.0, -3.0, 0.09, 1.0, 1.0]))
print('Scenario B (face=1,pose=0):', predict_with_model(m, [0.1234, 0.5, -10.0, 0.0, 1.0, 999.0, 999.0, 0.09, 1.0, 0.0]))
print('Scenario C (face=0,pose=1,high slump):', predict_with_model(m, [0.0, 0.0, 0.0, 0.0, 0.0, 80.0, 0.0, -999.0, 0.0, 1.0], slump_threshold=30))
print('Scenario C (face=0,pose=1,low slump):', predict_with_model(m, [0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 0.0, -999.0, 0.0, 1.0], slump_threshold=30))


Scenario A (face=1,pose=1): {'prediction': array([[0.1, 0.9]])}
Scenario B (face=1,pose=0): {'prediction': array([[0.1, 0.9]])}
Scenario C (face=0,pose=1,high slump): {'heuristic': 'Pass Out'}
Scenario C (face=0,pose=1,low slump): {'heuristic': 'Unknown/Adjust Camera'}


In [114]:
# 5. Trích xuất đặc trưng và đánh dấu độ tin cậy (Example snippet)
# Use this example inside your frame-processing loop where `image`, `label_index`,
# `filename`, `face_landmarker`, `pose_landmarker` and `data_rows` are defined.
#
# features, is_reliable = extract_features(image, face_landmarker, pose_landmarker)
# if is_reliable:
#     row = {
#         'EAR': features[0], 'MAR': features[1], 'PITCH': features[2],
#         'YAW': features[3], 'ROLL': features[4], 'D_SLUMP': features[5],
#         'R_TILT': features[6], 'EYE_CL': features[7],
#         'FACIAL_DISPLAYED': features[8], 'POSE_DISPLAYED': features[9],
#         'Label': label_index,
#         'File_name': filename
#     }
#     data_rows.append(row)


In [115]:
df = pd.read_csv('./Outputs/training-feature.csv', header = 0)
df.iloc[:, 0:8] = df.iloc[:, 0:8].round(4)

In [116]:
# Quick local test (does not require mediapipe models)
# Demonstrate three scenarios and resulting features vector (same order used for CSV/model)

import numpy as np

def build_features_mock(face_present, pose_present):
    # Simulated raw values
    avg_ear_raw = 0.123456789
    mar_raw = 0.987654321
    pitch_raw = -12.3456789
    yaw_raw = 5.43219876
    roll_raw = 1.23456789
    d_slump_raw = 23.456789
    r_tilt_raw = -3.14159265
    eye_closure_raw = 10.9876543

    if face_present:
        # simulate face bbox width
        face_w = 120.0
        normalized_eye_closure_raw = eye_closure_raw / (face_w + 1e-6)
    else:
        normalized_eye_closure_raw = -999.0

    if not pose_present and face_present:
        # scenario B: neutralize body metrics
        d_slump_raw = 0.0
        r_tilt_raw = 0.0

    if not face_present and pose_present:
        # scenario C: facial dummies, keep slump from pose
        avg_ear_raw = -999.0
        mar_raw = -999.0
        pitch_raw = -999.0
        yaw_raw = -999.0
        roll_raw = -999.0

    avg_ear = round(float(avg_ear_raw), 4)
    mar = round(float(mar_raw), 4)
    eye_closure = round(float(normalized_eye_closure_raw), 4)
    d_slump = round(float(d_slump_raw), 4)
    r_tilt = round(float(r_tilt_raw), 4)
    pitch = round(float(pitch_raw), 4)
    yaw = round(float(yaw_raw), 4)
    roll = round(float(roll_raw), 4)

    features = np.array([avg_ear, mar, pitch, yaw, roll, d_slump, r_tilt, eye_closure, float(face_present), float(pose_present)])
    return features

print('Scenario A (face=1, pose=1):', build_features_mock(True, True))
print('Scenario B (face=1, pose=0):', build_features_mock(True, False))
print('Scenario C (face=0, pose=1):', build_features_mock(False, True))


Scenario A (face=1, pose=1): [  0.1235   0.9877 -12.3457   5.4322   1.2346  23.4568  -3.1416   0.0916
   1.       1.    ]
Scenario B (face=1, pose=0): [  0.1235   0.9877 -12.3457   5.4322   1.2346   0.       0.       0.0916
   1.       0.    ]
Scenario C (face=0, pose=1): [-999.     -999.     -999.     -999.     -999.       23.4568   -3.1416
 -999.        0.        1.    ]


In [117]:
# === Run full data collection and export CSV ===
import os
import cv2
import pandas as pd

os.makedirs(OUTPUT_DIR, exist_ok=True)
face_model_path = os.path.join('model', 'face_landmarker.task')
pose_model_path = os.path.join('model', 'pose_landmarker_full.task')

if not (os.path.exists(face_model_path) and os.path.exists(pose_model_path)):
    print('MediaPipe model files missing. Expected:', face_model_path, 'and', pose_model_path)
    print('Place the .task files in the ./model/ folder and re-run this cell.')
else:
    try:
        base_options_face = python.BaseOptions(model_asset_path=face_model_path)
        face_options = vision.FaceLandmarkerOptions(base_options=base_options_face, running_mode=vision.RunningMode.IMAGE)
        face_landmarker = vision.FaceLandmarker.create_from_options(face_options)

        base_options_pose = python.BaseOptions(model_asset_path=pose_model_path)
        pose_options = vision.PoseLandmarkerOptions(base_options=base_options_pose, running_mode=vision.RunningMode.IMAGE)
        pose_landmarker = vision.PoseLandmarker.create_from_options(pose_options)
    except Exception as e:
        print('Failed to initialize mediapipe landmarkers:', e)
        raise

    data_rows = []
    # Build label map with explicit desired mapping to ensure folder->label consistency
    # Desired mapping: 0=Awake, 1=Sleeping, 2=Yawning, 3=Passed_out
    desired_map = {'Awake': 0, 'Sleep': 1, 'Yawning': 2, 'Passed_out': 3}
    label_map = {}
    # Assign desired labels if the expected folders exist
    for name, idx in desired_map.items():
        path = os.path.join(DATA_ROOT, name)
        if os.path.isdir(path):
            label_map[name] = idx
        else:
            # If a desired folder is missing, skip but keep other mappings stable
            pass
    # Add any other folders present in DATA_ROOT, assigning unique labels after the desired indices
    existing_dirs = [d for d in sorted(os.listdir(DATA_ROOT)) if os.path.isdir(os.path.join(DATA_ROOT, d))]
    next_label = max(label_map.values()) + 1 if label_map else 0
    for d in existing_dirs:
        if d in label_map:
            continue
        label_map[d] = next_label
        next_label += 1

    print('Label mapping:', label_map)

    # Iterate through labeled directories
    for label in sorted(os.listdir(DATA_ROOT)):
        dir_path = os.path.join(DATA_ROOT, label)
        if not os.path.isdir(dir_path):
            continue
        for fname in sorted(os.listdir(dir_path)):
            fpath = os.path.join(dir_path, fname)
            if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                continue
            img = cv2.imread(fpath)
            if img is None:
                print('Failed to read image', fpath)
                continue
            features, is_reliable = extract_features(img, face_landmarker, pose_landmarker)
            if not is_reliable:
                continue
            row = {
                'EAR': features[0], 'MAR': features[1], 'PITCH': features[2],
                'YAW': features[3], 'ROLL': features[4], 'D_SLUMP': features[5],
                'R_TILT': features[6], 'EYE_CL': features[7],
                'FACIAL_DISPLAYED': features[8], 'POSE_DISPLAYED': features[9],
                'Label': label_map.get(label, DUMMY_VALUE), 'File_name': fpath,
            }
            data_rows.append(row)

    df_out = pd.DataFrame(data_rows)
    if not df_out.empty:
        # round numeric columns 0:8
        df_out.iloc[:, 0:8] = df_out.iloc[:, 0:8].round(4)
        df_out.to_csv(OUTPUT_CSV, index=False)
        print('Wrote', OUTPUT_CSV, 'rows:', len(df_out))
        display(df_out.head())
    else:
        print('No reliable feature rows extracted. CSV not written.')

Label mapping: {'Awake': 0, 'Sleep': 1, 'Yawning': 2, 'Passed_out': 3}
Wrote ./Outputs\training-feature.csv rows: 2649


Unnamed: 0,EAR,MAR,PITCH,YAW,ROLL,D_SLUMP,R_TILT,EYE_CL,FACIAL_DISPLAYED,POSE_DISPLAYED,Label,File_name
0,0.2769,0.2869,-18.5554,178.4571,158.9472,-99.6823,175.2384,0.056,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132733_jpg.r...
1,0.2442,0.3397,-25.0597,-179.2271,168.7611,28.5194,-88.9147,0.0421,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132734_jpg.r...
2,0.2466,0.3346,-25.1369,-179.2418,165.9188,-5.1335,-93.9959,0.0437,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132734_jpg.r...
3,0.2497,0.2491,-5.2367,6.9256,-146.2565,-88.1373,-177.1412,0.0507,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132738_jpg.r...
4,0.1725,0.2865,20.0368,-173.5545,163.3054,-116.3889,179.3054,0.0318,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132910_jpg.r...


In [3]:
df = pd.read_csv('./Outputs/training-feature.csv', header=0)
df

Unnamed: 0,EAR,MAR,PITCH,YAW,ROLL,D_SLUMP,R_TILT,EYE_CL,FACIAL_DISPLAYED,POSE_DISPLAYED,Label,File_name
0,0.2769,0.2869,-18.5554,178.4571,158.9472,-99.6823,175.2384,0.0560,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132733_jpg.r...
1,0.2442,0.3397,-25.0597,-179.2271,168.7611,28.5194,-88.9147,0.0421,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132734_jpg.r...
2,0.2466,0.3346,-25.1369,-179.2418,165.9188,-5.1335,-93.9959,0.0437,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132734_jpg.r...
3,0.2497,0.2491,-5.2367,6.9256,-146.2565,-88.1373,-177.1412,0.0507,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132738_jpg.r...
4,0.1725,0.2865,20.0368,-173.5545,163.3054,-116.3889,179.3054,0.0318,1.0,1.0,0,./Datasets_train\Awake\IMG20221206132910_jpg.r...
...,...,...,...,...,...,...,...,...,...,...,...,...
2644,0.5407,1.3963,-55.9649,-2.4633,-169.9219,-151.8759,168.7519,0.0933,1.0,1.0,2,./Datasets_train\Yawning\98_jpg.rf.53e253f924f...
2645,0.5027,1.2889,-53.8685,-5.4141,-167.8768,-151.6174,171.2678,0.0858,1.0,1.0,2,./Datasets_train\Yawning\99_jpg.rf.6ebf77c0923...
2646,0.5027,1.2889,-53.8685,-5.4141,-167.8768,-151.6174,171.2678,0.0858,1.0,1.0,2,./Datasets_train\Yawning\99_jpg.rf.e36044df54f...
2647,0.3074,0.7548,37.1865,5.6339,-171.5681,-72.4688,175.4015,0.0621,1.0,1.0,2,./Datasets_train\Yawning\9_jpg.rf.0377dc31d5f2...


In [33]:
df[df['EAR'] == DUMMY_VALUE]

Unnamed: 0,EAR,MAR,PITCH,YAW,ROLL,D_SLUMP,R_TILT,EYE_CL,FACIAL_DISPLAYED,POSE_DISPLAYED,Label,File_name
137,-999.0,-999.0,-999.0,-999.0,-999.0,49.1356,-19.8760,-999.0,0.0,1.0,0,./Datasets_train\Awake\IMG_20230712_173111_JPG...
141,-999.0,-999.0,-999.0,-999.0,-999.0,-27.0321,-89.4571,-999.0,0.0,1.0,0,./Datasets_train\Awake\IMG_20230712_175930_JPG...
167,-999.0,-999.0,-999.0,-999.0,-999.0,-123.3905,-169.4538,-999.0,0.0,1.0,0,./Datasets_train\Awake\IMG_2049_JPG_jpg.rf.8e6...
566,-999.0,-999.0,-999.0,-999.0,-999.0,-11.2797,179.9826,-999.0,0.0,1.0,3,./Datasets_train\Passed_out\1000_jpg.rf.1c64ec...
568,-999.0,-999.0,-999.0,-999.0,-999.0,-36.2596,179.4205,-999.0,0.0,1.0,3,./Datasets_train\Passed_out\1020_jpg.rf.32d2b1...
...,...,...,...,...,...,...,...,...,...,...,...,...
2574,-999.0,-999.0,-999.0,-999.0,-999.0,-106.8333,165.8508,-999.0,0.0,1.0,2,./Datasets_train\Yawning\706_jpg.rf.c6a644d39b...
2579,-999.0,-999.0,-999.0,-999.0,-999.0,-138.2481,177.0695,-999.0,0.0,1.0,2,./Datasets_train\Yawning\70_jpg.rf.30b98cafd12...
2580,-999.0,-999.0,-999.0,-999.0,-999.0,-138.2481,177.0695,-999.0,0.0,1.0,2,./Datasets_train\Yawning\70_jpg.rf.b9de9082ff9...
2603,-999.0,-999.0,-999.0,-999.0,-999.0,-131.1640,-177.8375,-999.0,0.0,1.0,2,./Datasets_train\Yawning\75_jpg.rf.d65e90f2731...


In [34]:
df_reasoned = df 

In [36]:
# Update labels in df_reasoned based on the specified conditions
df_reasoned.loc[
    (df_reasoned['EAR'] == DUMMY_VALUE) & 
    (df_reasoned['MAR'] == DUMMY_VALUE) & 
    (df_reasoned['EYE_CL'] == DUMMY_VALUE) & 
    (df_reasoned['FACIAL_DISPLAYED'] == 0), 
    'Label'
] = 3

# For the second condition: if pose_displayed is 0 and facial_displayed is 1, keep the old label (no change needed, as it's already set)

# Third condition: if both facial_displayed and pose_displayed are 0, set label to 3
df_reasoned.loc[
    (df_reasoned['FACIAL_DISPLAYED'] == 0) & 
    (df_reasoned['POSE_DISPLAYED'] == 0), 
    'Label'
] = 3

In [38]:
df_reasoned[df['FACIAL_DISPLAYED'] == 0]

Unnamed: 0,EAR,MAR,PITCH,YAW,ROLL,D_SLUMP,R_TILT,EYE_CL,FACIAL_DISPLAYED,POSE_DISPLAYED,Label,File_name
137,-999.0,-999.0,-999.0,-999.0,-999.0,49.1356,-19.8760,-999.0,0.0,1.0,3,./Datasets_train\Awake\IMG_20230712_173111_JPG...
141,-999.0,-999.0,-999.0,-999.0,-999.0,-27.0321,-89.4571,-999.0,0.0,1.0,3,./Datasets_train\Awake\IMG_20230712_175930_JPG...
167,-999.0,-999.0,-999.0,-999.0,-999.0,-123.3905,-169.4538,-999.0,0.0,1.0,3,./Datasets_train\Awake\IMG_2049_JPG_jpg.rf.8e6...
566,-999.0,-999.0,-999.0,-999.0,-999.0,-11.2797,179.9826,-999.0,0.0,1.0,3,./Datasets_train\Passed_out\1000_jpg.rf.1c64ec...
568,-999.0,-999.0,-999.0,-999.0,-999.0,-36.2596,179.4205,-999.0,0.0,1.0,3,./Datasets_train\Passed_out\1020_jpg.rf.32d2b1...
...,...,...,...,...,...,...,...,...,...,...,...,...
2574,-999.0,-999.0,-999.0,-999.0,-999.0,-106.8333,165.8508,-999.0,0.0,1.0,3,./Datasets_train\Yawning\706_jpg.rf.c6a644d39b...
2579,-999.0,-999.0,-999.0,-999.0,-999.0,-138.2481,177.0695,-999.0,0.0,1.0,3,./Datasets_train\Yawning\70_jpg.rf.30b98cafd12...
2580,-999.0,-999.0,-999.0,-999.0,-999.0,-138.2481,177.0695,-999.0,0.0,1.0,3,./Datasets_train\Yawning\70_jpg.rf.b9de9082ff9...
2603,-999.0,-999.0,-999.0,-999.0,-999.0,-131.1640,-177.8375,-999.0,0.0,1.0,3,./Datasets_train\Yawning\75_jpg.rf.d65e90f2731...


In [45]:
df_reasoned.to_csv('./Outputs/final.csv', index=False)

In [40]:
df[df['Label'] == 2]

Unnamed: 0,EAR,MAR,PITCH,YAW,ROLL,D_SLUMP,R_TILT,EYE_CL,FACIAL_DISPLAYED,POSE_DISPLAYED,Label,File_name
1757,0.4490,1.1578,-50.9801,-4.4971,-167.6374,-153.3012,170.6270,0.0769,1.0,1.0,2,./Datasets_train\Yawning\102_jpg.rf.8c315df219...
1758,0.4490,1.1578,-50.9801,-4.4971,-167.6374,-153.3012,170.6270,0.0769,1.0,1.0,2,./Datasets_train\Yawning\102_jpg.rf.9eb779437d...
1759,0.4341,1.1093,-52.5147,-4.4072,-169.4199,-152.2659,170.4873,0.0727,1.0,1.0,2,./Datasets_train\Yawning\104_jpg.rf.7c627d8f35...
1760,0.4341,1.1093,-52.5147,-4.4072,-169.4199,-152.2659,170.4873,0.0727,1.0,1.0,2,./Datasets_train\Yawning\104_jpg.rf.916da6bda0...
1761,0.4843,1.5242,-55.0202,-4.5883,-169.0989,-150.9139,170.7693,0.0843,1.0,1.0,2,./Datasets_train\Yawning\107_jpg.rf.1fa685b66f...
...,...,...,...,...,...,...,...,...,...,...,...,...
2644,0.5407,1.3963,-55.9649,-2.4633,-169.9219,-151.8759,168.7519,0.0933,1.0,1.0,2,./Datasets_train\Yawning\98_jpg.rf.53e253f924f...
2645,0.5027,1.2889,-53.8685,-5.4141,-167.8768,-151.6174,171.2678,0.0858,1.0,1.0,2,./Datasets_train\Yawning\99_jpg.rf.6ebf77c0923...
2646,0.5027,1.2889,-53.8685,-5.4141,-167.8768,-151.6174,171.2678,0.0858,1.0,1.0,2,./Datasets_train\Yawning\99_jpg.rf.e36044df54f...
2647,0.3074,0.7548,37.1865,5.6339,-171.5681,-72.4688,175.4015,0.0621,1.0,1.0,2,./Datasets_train\Yawning\9_jpg.rf.0377dc31d5f2...
