In [None]:
import cv2
import mediapipe as mp
import numpy as np
import joblib
import tensorflow as tf

# Load models
pose_model = joblib.load("G:/zolo/classroom-attention-monitor/models/pose_model.pkl")
headpose_model = joblib.load("G:/zolo/classroom-attention-monitor/models/headpose_model_fast.pkl")
eye_model = tf.keras.models.load_model("G:/zolo/classroom-attention-monitor/models/eye_state_cnn.h5")

# MediaPipe setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False)
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, refine_landmarks=True, max_num_faces=1)
drawing = mp.solutions.drawing_utils

# Constants
LEFT_EYE = [33, 133, 160, 159, 158, 144, 153, 154, 155]
RIGHT_EYE = [362, 263, 387, 386, 385, 373, 380, 381, 382]
IMG_SIZE = 34
CLOSED_THRESHOLD = 10
closed_frames = 0

# Webcam
cap = cv2.VideoCapture(0)
calibrated_yaw = 0
yaw_sum = 0
calibration_frames = 30
calibrating = True
frame_index = 0

def extract_eye(eye_points, landmarks, frame):
    h, w, _ = frame.shape
    points = [(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in eye_points]
    x_min = max(min(p[0] for p in points) - 5, 0)
    x_max = min(max(p[0] for p in points) + 5, w)
    y_min = max(min(p[1] for p in points) - 5, 0)
    y_max = min(max(p[1] for p in points) + 5, h)
    eye = frame[y_min:y_max, x_min:x_max]

    cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 255), 2)

    if eye.size == 0:
        return None
    eye = cv2.cvtColor(eye, cv2.COLOR_BGR2GRAY)
    eye = cv2.resize(eye, (IMG_SIZE, IMG_SIZE)) / 255.0
    return eye.reshape(1, IMG_SIZE, IMG_SIZE, 1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    label_pose = "Unknown"
    label_yaw = "Looking Straight"
    label_eye = "Open"
    attention_state = []

    pose_result = pose.process(rgb)
    face_result = face_mesh.process(rgb)

    if pose_result.pose_landmarks:
        drawing.draw_landmarks(frame, pose_result.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        pose_row = []
        for lm in pose_result.pose_landmarks.landmark:
            pose_row.extend([lm.x, lm.y, lm.visibility])
        if len(pose_row) == 99:
            pred_pose = pose_model.predict(np.array(pose_row).reshape(1, -1))[0]
            label_map = {0: "Attentive", 1: "Slouching", 2: "Raise Hand"}
            label_pose = label_map.get(int(pred_pose), "Unknown")
            attention_state.append(label_pose)

        # Rule-based hand raise
        lw, ls = pose_result.pose_landmarks.landmark[15], pose_result.pose_landmarks.landmark[11]
        rw, rs = pose_result.pose_landmarks.landmark[16], pose_result.pose_landmarks.landmark[12]
        if lw.visibility > 0.5 and ls.visibility > 0.5 and lw.y < ls.y:
            attention_state.append("Raise Hand (L)")
        if rw.visibility > 0.5 and rs.visibility > 0.5 and rw.y < rs.y:
            attention_state.append("Raise Hand (R)")

    if face_result.multi_face_landmarks:
        face_landmarks = face_result.multi_face_landmarks[0]
        drawing.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION)

        features = []
        for i in range(50):
            lm = face_landmarks.landmark[i]
            features.extend([lm.x, lm.y])
        while len(features) < 100:
            features.append(0)

        yaw, pitch, roll = headpose_model.predict(np.array(features).reshape(1, -1))[0]
        print(f"RAW YAW: {yaw:.2f}, PITCH: {pitch:.2f}")

        frame_index += 1

        if calibrating:
            yaw_sum += yaw
            if frame_index == calibration_frames:
                calibrated_yaw = yaw_sum / calibration_frames
                calibrating = False
                print(f"✅ Calibrated yaw offset: {calibrated_yaw:.2f}")
                continue  # skip this frame
            cv2.putText(frame, "Calibrating... Look straight", (20, 460),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)
        else:
            yaw_corrected = yaw - calibrated_yaw
            print(f"CORRECTED YAW: {yaw_corrected:.2f}")
            cv2.putText(frame, f"Yaw: {yaw_corrected:.2f}", (20, 120),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

            if abs(yaw_corrected) > 20:
                attention_state.append("Looking Away")

            # Eye state
            landmarks = face_landmarks.landmark
            left_eye = extract_eye(LEFT_EYE, landmarks, frame)
            right_eye = extract_eye(RIGHT_EYE, landmarks, frame)

            if left_eye is not None and right_eye is not None:
                left_conf = eye_model.predict(left_eye, verbose=0)[0]
                right_conf = eye_model.predict(right_eye, verbose=0)[0]
                print("Left Eye Conf:", left_conf)
                print("Right Eye Conf:", right_conf)

                if left_conf[1] > 0.85 and right_conf[1] > 0.85:
                    closed_frames += 1
                    label_eye = "Closed"
                else:
                    closed_frames = 0
                    label_eye = "Open"

                if closed_frames >= CLOSED_THRESHOLD or pitch > 15:
                    attention_state.append("Drowsy")
    else:
        attention_state.append("Off-Camera")

    # Draw final state
    final_label = ", ".join(attention_state) if attention_state else "No Attention Label"
    cv2.putText(frame, f"State: {final_label}", (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 3)

    cv2.imshow("Classroom Attention Monitor", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import numpy as np

# Setup MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, refine_landmarks=True, max_num_faces=1)
drawing = mp.solutions.drawing_utils

# Yaw calibration vars
calibration_frames = 30
yaw_sum = 0
frame_count = 0
calibrated_yaw = 0
calibrated = False

# Start webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = face_mesh.process(rgb)

    label = "Looking Straight"

    if result.multi_face_landmarks:
        face = result.multi_face_landmarks[0]
        drawing.draw_landmarks(frame, face, mp_face_mesh.FACEMESH_TESSELATION)
        lm = face.landmark

        # 3D coordinates for yaw estimation
        nose = np.array([lm[1].x, lm[1].y, lm[1].z])
        left_eye = np.array([lm[33].x, lm[33].y, lm[33].z])
        right_eye = np.array([lm[263].x, lm[263].y, lm[263].z])
        eye_mid = (left_eye + right_eye) / 2
        face_vector = nose - eye_mid

        # Yaw angle in degrees
        yaw_angle = -np.arctan2(face_vector[0], face_vector[2]) * 180 / np.pi

        # Visualize face direction
        cx, cy = int(lm[1].x * w), int(lm[1].y * h)
        dx = int(150 * face_vector[0])
        dy = int(150 * face_vector[1])
        cv2.arrowedLine(frame, (cx, cy), (cx + dx, cy + dy), (0, 255, 0), 2)

        # Calibration
        if not calibrated:
            yaw_sum += yaw_angle
            frame_count += 1
            cv2.putText(frame, "Calibrating... Look straight", (20, 460),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
            if frame_count == calibration_frames:
                calibrated_yaw = yaw_sum / calibration_frames
                calibrated = True
                print(f"✅ Calibrated yaw: {calibrated_yaw:.2f}")
        else:
            corrected_yaw = yaw_angle - calibrated_yaw
            cv2.putText(frame, f"Yaw: {corrected_yaw:.1f}", (20, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (200, 255, 200), 2)
            if abs(corrected_yaw) > 20:
                label = "Looking Away"
            else:
                label = "Looking Straight"
    else:
        label = "No Face Detected"

    # Draw final label
    cv2.putText(frame, label, (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 3)

    cv2.imshow("Looking Away Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


✅ Calibrated yaw: 176.13


In [None]:
import cv2
import numpy as np
import joblib
import tensorflow as tf
import mediapipe as mp

# Load models
eye_model = tf.keras.models.load_model("G:/zolo/classroom-attention-monitor/models/eye_state_cnn.h5")
pose_model = joblib.load("G:/zolo/classroom-attention-monitor/models/headpose_model_fast.pkl")  # for pitch only

# MediaPipe Face Mesh setup
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, refine_landmarks=True)
drawing = mp.solutions.drawing_utils

# Landmark indexes
LEFT_EYE = [33, 133, 160, 159, 158, 144, 153, 154, 155]
RIGHT_EYE = [362, 263, 387, 386, 385, 373, 380, 381, 382]
IMG_SIZE = 34

# Drowsiness tracking
closed_frames = 0
CLOSED_THRESHOLD = 15
yaw_calibration_frames = 30
yaw_sum = 0
frame_count = 0
calibrated_yaw = 0
yaw_calibrated = False

# Webcam
cap = cv2.VideoCapture(0)

# Helper: crop eye
def extract_eye_image(eye_indices, landmarks, frame):
    h, w, _ = frame.shape
    points = [(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in eye_indices]
    x_min = max(min(p[0] for p in points) - 5, 0)
    x_max = min(max(p[0] for p in points) + 5, w)
    y_min = max(min(p[1] for p in points) - 5, 0)
    y_max = min(max(p[1] for p in points) + 5, h)
    eye_img = frame[y_min:y_max, x_min:x_max]
    if eye_img.size == 0:
        return None
    eye_img = cv2.cvtColor(eye_img, cv2.COLOR_BGR2GRAY)
    eye_img = cv2.resize(eye_img, (IMG_SIZE, IMG_SIZE)) / 255.0
    return eye_img.reshape(1, IMG_SIZE, IMG_SIZE, 1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    h, w, _ = frame.shape
    label = ""
    label_color = (0, 255, 0)
    is_drowsy = False
    looking_away = False

    if results.multi_face_landmarks:
        face_landmarks = results.multi_face_landmarks[0]
        lm = face_landmarks.landmark

        # Draw mesh
        drawing.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION)

        # Head direction vector (yaw)
        nose = np.array([lm[1].x, lm[1].y, lm[1].z])
        left_eye_corner = np.array([lm[33].x, lm[33].y, lm[33].z])
        right_eye_corner = np.array([lm[263].x, lm[263].y, lm[263].z])
        eye_mid = (left_eye_corner + right_eye_corner) / 2
        face_vector = nose - eye_mid
        yaw_angle = np.arctan2(face_vector[0], face_vector[2]) * 180 / np.pi

        # Calibrate yaw
        if not yaw_calibrated:
            yaw_sum += yaw_angle
            frame_count += 1
            cv2.putText(frame, "Calibrating Yaw... Look Straight", (20, 460),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
            if frame_count == yaw_calibration_frames:
                calibrated_yaw = yaw_sum / yaw_calibration_frames
                yaw_calibrated = True
                print(f"✅ Yaw calibrated: {calibrated_yaw:.2f}")
        else:
            corrected_yaw = yaw_angle - calibrated_yaw
            cv2.putText(frame, f"Yaw: {corrected_yaw:.1f}", (20, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            if abs(corrected_yaw) > 20:
                looking_away = True

        # Eye processing
        left_eye = extract_eye_image(LEFT_EYE, lm, frame)
        right_eye = extract_eye_image(RIGHT_EYE, lm, frame)

        eyes_closed = False
        if left_eye is not None and right_eye is not None:
            l_pred = eye_model.predict(left_eye, verbose=0)[0]
            r_pred = eye_model.predict(right_eye, verbose=0)[0]
            left_state = np.argmax(l_pred)
            right_state = np.argmax(r_pred)

            if left_state == 1 and right_state == 1:
                closed_frames += 1
            else:
                closed_frames = 0

            if closed_frames >= CLOSED_THRESHOLD:
                eyes_closed = True

        # Get pitch angle from headpose model (not yaw!)
        pose_landmarks = []
        for i in range(50):
            pose_landmarks.extend([lm[i].x, lm[i].y])
        while len(pose_landmarks) < 100:
            pose_landmarks.append(0)
        yaw, pitch, roll = pose_model.predict(np.array(pose_landmarks).reshape(1, -1))[0]

        # Final logic
        if eyes_closed or pitch > 15:
            is_drowsy = True
            label += "Drowsy "
            label_color = (0, 0, 255)

        if looking_away:
            label += "| Looking Away"
            label_color = (0, 165, 255)

        if not label:
            label = "Awake & Focused"
            label_color = (0, 255, 0)

    else:
        label = "No Face Detected"
        label_color = (100, 100, 100)

    # Draw final label
    cv2.putText(frame, label, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.1, label_color, 3)
    cv2.imshow("Attention Monitor", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [1]:
import cv2
import numpy as np
import tensorflow as tf
import joblib
import mediapipe as mp

# Load models
eye_model = tf.keras.models.load_model("G:/zolo/classroom-attention-monitor/models/eye_state_cnn.h5")
headpose_model = joblib.load("G:/zolo/classroom-attention-monitor/models/headpose_model_fast.pkl")
pose_model = joblib.load("G:/zolo/classroom-attention-monitor/models/pose_model.pkl")

# MediaPipe setup
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, refine_landmarks=True, max_num_faces=1)

mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False)
drawing = mp.solutions.drawing_utils

# Constants
LEFT_EYE = [33, 133, 160, 159, 158, 144, 153, 154, 155]
RIGHT_EYE = [362, 263, 387, 386, 385, 373, 380, 381, 382]
IMG_SIZE = 34
CLOSED_THRESHOLD = 15
closed_frames = 0

# Yaw calibration
calibration_frames = 30
yaw_sum = 0
frame_count = 0
calibrated_yaw = 0
calibrated = False

# Webcam
cap = cv2.VideoCapture(0)

def extract_eye_image(eye_indices, landmarks, frame):
    h, w, _ = frame.shape
    points = [(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in eye_indices]
    x_min = max(min(p[0] for p in points) - 5, 0)
    x_max = min(max(p[0] for p in points) + 5, w)
    y_min = max(min(p[1] for p in points) - 5, 0)
    y_max = min(max(p[1] for p in points) + 5, h)
    eye_img = frame[y_min:y_max, x_min:x_max]
    if eye_img.size == 0:
        return None
    eye_img = cv2.cvtColor(eye_img, cv2.COLOR_BGR2GRAY)
    eye_img = cv2.resize(eye_img, (IMG_SIZE, IMG_SIZE)) / 255.0
    return eye_img.reshape(1, IMG_SIZE, IMG_SIZE, 1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    attention_state = []

    # Pose processing (for Attentive, Slouching, Raise Hand)
    pose_result = pose.process(rgb)
    if pose_result.pose_landmarks:
        drawing.draw_landmarks(frame, pose_result.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        pose_row = []
        for lm in pose_result.pose_landmarks.landmark:
            pose_row.extend([lm.x, lm.y, lm.visibility])
        if len(pose_row) == 99:
            pred_pose = pose_model.predict(np.array(pose_row).reshape(1, -1))[0]
            label_map = {0: "Attentive", 1: "Slouching", 2: "Raise Hand"}
            pose_label = label_map.get(int(pred_pose), "Unknown")
            attention_state.append(pose_label)
        if pose_result.pose_landmarks:
            landmarks = pose_result.pose_landmarks.landmark
            left_wrist = landmarks[15]
            left_shoulder = landmarks[11]
            right_wrist = landmarks[16]
            right_shoulder = landmarks[12]

        if (left_wrist.visibility > 0.5 and left_shoulder.visibility > 0.5 and
            left_wrist.y < left_shoulder.y):
            attention_state.append("Raise Hand (L)")

        if (right_wrist.visibility > 0.5 and right_shoulder.visibility > 0.5 and
            right_wrist.y < right_shoulder.y):
            attention_state.append("Raise Hand (R)")


    # Face mesh
    face_result = face_mesh.process(rgb)
    if face_result.multi_face_landmarks:
        face_landmarks = face_result.multi_face_landmarks[0]
        lm = face_landmarks.landmark
        drawing.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION)

        # Vector yaw (calibration)
        nose = np.array([lm[1].x, lm[1].y, lm[1].z])
        left_eye_corner = np.array([lm[33].x, lm[33].y, lm[33].z])
        right_eye_corner = np.array([lm[263].x, lm[263].y, lm[263].z])
        eye_mid = (left_eye_corner + right_eye_corner) / 2
        face_vector = nose - eye_mid
        yaw_angle = -np.arctan2(face_vector[0], face_vector[2]) * 180 / np.pi


        
        # face_vector = nose - eye_mid
        # yaw_angle = np.arctan2(face_vector[0], face_vector[2]) * 180 / np.pi

        cx, cy = int(lm[1].x * w), int(lm[1].y * h)
        dx = int(150 * face_vector[0])
        dy = int(150 * face_vector[1])
        cv2.arrowedLine(frame, (cx, cy), (cx + dx, cy + dy), (0, 255, 0), 2)

        if not calibrated:
            yaw_sum += yaw_angle
            frame_count += 1
            cv2.putText(frame, "Calibrating... Look straight", (20, 460),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
            if frame_count == calibration_frames:
                calibrated_yaw = yaw_sum / calibration_frames
                calibrated = True
                print(f"✅ Yaw calibrated: {calibrated_yaw:.2f}")
        else:
            corrected_yaw = yaw_angle - calibrated_yaw
            if corrected_yaw > 15:
                attention_state.append("Looking Away (Right)")
            elif corrected_yaw < -15:
                attention_state.append("Looking Straight")
            else:
                attention_state.append("Looking Away (Left)")


        # Eye state prediction
        left_eye_img = extract_eye_image(LEFT_EYE, lm, frame)
        right_eye_img = extract_eye_image(RIGHT_EYE, lm, frame)
        eyes_closed = False

        if left_eye_img is not None and right_eye_img is not None:
            l_pred = eye_model.predict(left_eye_img, verbose=0)[0]
            r_pred = eye_model.predict(right_eye_img, verbose=0)[0]
            if np.argmax(l_pred) == 1 and np.argmax(r_pred) == 1:
                closed_frames += 1
            else:
                closed_frames = 0
            if closed_frames >= CLOSED_THRESHOLD:
                eyes_closed = True

        # Pitch prediction from headpose model
        headpose_feats = []
        for i in range(50):
            headpose_feats.extend([lm[i].x, lm[i].y])
        while len(headpose_feats) < 100:
            headpose_feats.append(0)

        yaw, pitch, roll = headpose_model.predict(np.array(headpose_feats).reshape(1, -1))[0]

        # Drowsy condition
        if eyes_closed or pitch > 15:
            attention_state.append("Drowsy")
    else:
        attention_state.append("Off-Camera")

    # Final label
    final_label = ", ".join(attention_state) if attention_state else "No Label"
    cv2.putText(frame, final_label, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 3)

    # Show window
    cv2.imshow("Realtime Monitor", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




✅ Yaw calibrated: 77.52


