Importing Libraries

In [2]:
import warnings
# Suppress specific Keras warnings from FER library
warnings.filterwarnings("ignore", message="The structure of `inputs` doesn't match the expected structure.")

import cv2
import mediapipe as mp
import numpy as np
from collections import deque
from fer import FER

Helping function for drawing of lines and shadows

In [3]:
# Utility for drawing clear text with shadow
def draw_text_with_shadow(image, text, pos, font_scale, color, thickness):
    x, y = pos
    cv2.putText(image, text, (x+1, y+1), cv2.FONT_HERSHEY_SIMPLEX,
                font_scale, (255,255,255), thickness+1, cv2.LINE_AA)
    cv2.putText(image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX,
                font_scale, color, thickness, cv2.LINE_AA)

Function for checking the hand opening

In [4]:

# Detect if hand is open by counting fingers raised and finger spread angle
def is_hand_open(hand_landmarks, image_width, image_height):
    wrist = hand_landmarks.landmark[0]
    tips = [hand_landmarks.landmark[i] for i in [4,8,12,16,20]]
    wrist_y = wrist.y
    count_up = sum(lm.y < wrist_y for lm in tips[1:])
    idx_tip = tips[1]
    pinky_tip = tips[4]
    idx_x, idx_y = int(idx_tip.x * image_width), int(idx_tip.y * image_height)
    pinky_x, pinky_y = int(pinky_tip.x * image_width), int(pinky_tip.y * image_height)
    wrist_x, wrist_y_px = int(wrist.x * image_width), int(wrist.y * image_height)
    vec_idx = np.array([idx_x - wrist_x, idx_y - wrist_y_px])
    vec_pinky = np.array([pinky_x - wrist_x, pinky_y - wrist_y_px])
    vec_idx_norm = vec_idx / (np.linalg.norm(vec_idx) + 1e-6)
    vec_pinky_norm = vec_pinky / (np.linalg.norm(vec_pinky) + 1e-6)
    angle_rad = np.arccos(np.clip(np.dot(vec_idx_norm, vec_pinky_norm), -1.0, 1.0))
    angle_deg = np.degrees(angle_rad)
    return count_up >= 4 and angle_deg > 30

Initializing Mediapipe utilities

In [5]:
# Initialize MediaPipe holistic and drawing utilities
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

Detection of facial landmarks

In [6]:
# Facial landmarks relevant for emotion heuristics
important_face_landmarks = {
    "Forehead": 10,
    "Eyes": 33,
    "Nose": 1,
    "Lips": 61,
    "Chin": 199,
    "Left Ear": 127,
    "Right Ear": 356,
    # Add inner brows and eye lid points important for AUs
    "Left_Eyebrow_Inner": 70,
    "Right_Eyebrow_Inner": 300,
    "Left_Eye_Upper": 159,
    "Left_Eye_Lower": 145,
    "Right_Eye_Upper": 386,
    "Right_Eye_Lower": 374
}
important_body_landmarks = {
    "Left Shoulder": 11,
    "Right Shoulder": 12,
    "Left Elbow": 13,
    "Right Elbow": 14,
    "Left Wrist": 15,
    "Right Wrist": 16,
    "Left Hip": 23,
    "Right Hip": 24,
    "Left Knee": 25,
    "Right Knee": 26,
    "Left Ankle": 27,
    "Right Ankle": 28
}

# Finger landmarks for fingertip trails and labels
finger_landmarks = {
    0: "Palm",
    4: "Thumb Tip",
    8: "Index Tip",
    12: "Middle Tip",
    16: "Ring Tip",
    20: "Pinky Tip"
}

offsets = {
    "Left": (-25, -12),
    "Right": (23, -10),
    "default": (10, -14)
}


Styling part

In [7]:
# Drawing parameters
font_scale = 0.40
line_thickness = 1
circle_radius = 2
text_thickness = 1

Emotion Tracking

In [11]:
# Initialize FER detector with MTCNN for better accuracy
emotion_detector = FER(mtcnn=True)

# For fingertip trails visualization
max_trails_len = 20
finger_trails_right = {i: deque(maxlen=max_trails_len) for i in finger_landmarks.keys()}
finger_trails_left = {i: deque(maxlen=max_trails_len) for i in finger_landmarks.keys()}

# Utility: Smooth emotion label to reduce jitter
emotion_history = deque(maxlen=10)
def smooth_emotion(emotion_label):
    emotion_history.append(emotion_label)
    return max(set(emotion_history), key=emotion_history.count)

# Emotion heuristics matching your detailed descriptions:

def detect_happy(face_landmarks):
    try:
        left_lip_corner = face_landmarks.landmark[61]
        right_lip_corner = face_landmarks.landmark[291]
        nose_tip = face_landmarks.landmark[1]
        left_cheek = face_landmarks.landmark[234]
        right_cheek = face_landmarks.landmark[454]
        left_eye_top = face_landmarks.landmark[159]
        left_eye_bottom = face_landmarks.landmark[145]
        lip_raise = (left_lip_corner.y < nose_tip.y) and (right_lip_corner.y < nose_tip.y)
        eye_gap_left = abs(left_eye_top.y - left_eye_bottom.y)
        eye_squint = eye_gap_left < 0.018
        cheek_raise = (left_cheek.y < nose_tip.y) or (right_cheek.y < nose_tip.y)
        return lip_raise and (eye_squint or cheek_raise)
    except:
        return False

def detect_sad(face_landmarks):
    try:
        left_brow_inner = face_landmarks.landmark[70]
        right_brow_inner = face_landmarks.landmark[300]
        left_eyebrow_center = face_landmarks.landmark[105]
        right_eyebrow_center = face_landmarks.landmark[334]
        mouth_left = face_landmarks.landmark[61]
        mouth_right = face_landmarks.landmark[291]
        eye_center = face_landmarks.landmark[168]
        brows_raised = (left_brow_inner.y < left_eyebrow_center.y) and (right_brow_inner.y < right_eyebrow_center.y)
        mouth_down = (mouth_left.y > eye_center.y) and (mouth_right.y > eye_center.y)
        return brows_raised and mouth_down
    except:
        return False

def detect_angry(face_landmarks):
    try:
        brow_left = face_landmarks.landmark[70]
        brow_right = face_landmarks.landmark[300]
        nose_tip = face_landmarks.landmark[1]
        lip_top = face_landmarks.landmark[13]
        lip_bottom = face_landmarks.landmark[14]
        brow_lowered = (brow_left.y > face_landmarks.landmark[159].y) and (brow_right.y > face_landmarks.landmark[386].y)
        nose_wrinkle = (nose_tip.y < face_landmarks.landmark[10].y)
        lips_pressed = abs(lip_top.y - lip_bottom.y) < 0.01
        return brow_lowered and nose_wrinkle and lips_pressed
    except:
        return False

def detect_nervous(face_landmarks):
    try:
        lip_bottom = face_landmarks.landmark[14]
        lip_top = face_landmarks.landmark[13]
        lips_pressed = abs(lip_top.y - lip_bottom.y) < 0.015
        inner_brow_left = face_landmarks.landmark[70]
        inner_brow_right = face_landmarks.landmark[300]
        brows_contracted = abs(inner_brow_left.x - inner_brow_right.x) < 0.05
        return lips_pressed and brows_contracted
    except:
        return False

def detect_sleepy(face_landmarks):
    try:
        left_eye_top = face_landmarks.landmark[159]
        left_eye_bottom = face_landmarks.landmark[145]
        right_eye_top = face_landmarks.landmark[386]
        right_eye_bottom = face_landmarks.landmark[374]
        mouth_top = face_landmarks.landmark[13]
        mouth_bottom = face_landmarks.landmark[14]
        eye_open_left = abs(left_eye_top.y - left_eye_bottom.y)
        eye_open_right = abs(right_eye_top.y - right_eye_bottom.y)
        mouth_open = abs(mouth_top.y - mouth_bottom.y) > 0.02
        return eye_open_left < 0.008 and eye_open_right < 0.008 and not mouth_open
    except:
        return False

def detect_active(face_landmarks):
    try:
        left_eye_top = face_landmarks.landmark[159]
        left_eye_bottom = face_landmarks.landmark[145]
        right_eye_top = face_landmarks.landmark[386]
        right_eye_bottom = face_landmarks.landmark[374]
        left_brow = face_landmarks.landmark[70]
        right_brow = face_landmarks.landmark[300]
        eye_open_left = abs(left_eye_top.y - left_eye_bottom.y)
        eye_open_right = abs(right_eye_top.y - right_eye_bottom.y)
        brow_raised = (left_brow.y < face_landmarks.landmark[10].y) and (right_brow.y < face_landmarks.landmark[10].y)
        return eye_open_left > 0.02 and eye_open_right > 0.02 and brow_raised
    except:
        return False

def detect_anxious(face_landmarks):
    try:
        left_eye_top = face_landmarks.landmark[159]
        left_eye_bottom = face_landmarks.landmark[145]
        right_eye_top = face_landmarks.landmark[386]
        right_eye_bottom = face_landmarks.landmark[374]
        left_brow = face_landmarks.landmark[70]
        right_brow = face_landmarks.landmark[300]
        lip_top = face_landmarks.landmark[13]
        lip_bottom = face_landmarks.landmark[14]
        eye_open_left = abs(left_eye_top.y - left_eye_bottom.y)
        eye_open_right = abs(right_eye_top.y - right_eye_bottom.y)
        brow_furrow = abs(left_brow.x - right_brow.x) < 0.08
        lips_pursed = abs(lip_top.y - lip_bottom.y) < 0.01
        return eye_open_left > 0.025 and eye_open_right > 0.025 and brow_furrow and lips_pursed
    except:
        return False

# Custom heuristic to detect sleepy or active states based on eye openness landmarks
def detect_sleepy_active(face_landmarks, width, height):
    try:
        left_eye_upper = face_landmarks.landmark[159]
        left_eye_lower = face_landmarks.landmark[145]
        right_eye_upper = face_landmarks.landmark[386]
        right_eye_lower = face_landmarks.landmark[374]

        left_eye_open = abs(left_eye_upper.y - left_eye_lower.y)
        right_eye_open = abs(right_eye_upper.y - right_eye_lower.y)
        avg_eye_open = (left_eye_open + right_eye_open) / 2

        sleepy_threshold = 0.008
        active_threshold = 0.02

        if avg_eye_open < sleepy_threshold:
            return "Sleepy"
        elif avg_eye_open > active_threshold:
            return "Active"
        else:
            return None
    except Exception:
        return None

def classify_detailed_emotion(face_landmarks, fer_emotion):
    # Priority to heuristics based on your detailed emotion definitions
    if face_landmarks:
        if detect_happy(face_landmarks): return "Happy"
        if detect_sad(face_landmarks): return "Sad"
        if detect_angry(face_landmarks): return "Angry"
        if detect_nervous(face_landmarks): return "Nervous"
        if detect_sleepy(face_landmarks): return "Sleepy"
        if detect_active(face_landmarks): return "Active"
        if detect_anxious(face_landmarks): return "Anxious"
    return fer_emotion  # fallback to FER prediction

Setting up the webcam (upto full screen)

In [13]:
# Initialize webcam and MediaPipe holistic model
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

window_name = 'Advanced FER with Detailed Annotations'
cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

paused = False
img_count = 0
theme_idx = 0

color_themes = [
    {'dot': (0,0,255), 'line': (0,0,0)},
    {'dot': (0,255,0), 'line': (160,32,240)},
    {'dot': (255,127,36), 'line': (30,30,30)}
]

emotion_frame_skip = 5
frame_counter = 0
cached_emotions = []

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        if not paused:
            ret, frame = cap.read()
            if not ret:
                break

            frame_counter += 1

            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(image_rgb)
            image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
            h, w, _ = image.shape
            color_theme = color_themes[theme_idx]

            # Draw important facial landmarks with labels
            if results.face_landmarks:
                for name, idx in important_face_landmarks.items():
                    try:
                        lm = results.face_landmarks.landmark[idx]
                        x, y = int(lm.x * w), int(lm.y * h)
                        cv2.circle(image, (x, y), circle_radius, color_theme['dot'], -1, cv2.LINE_AA)
                        dx, dy = offsets["default"]
                        draw_text_with_shadow(image, name, (x+dx, y+dy), font_scale, (0,0,0), text_thickness)
                    except:
                        pass

            # Draw and label body landmarks and pose connections
            if results.pose_landmarks:
                for name, idx in important_body_landmarks.items():
                    try:
                        lm = results.pose_landmarks.landmark[idx]
                        x, y = int(lm.x * w), int(lm.y * h)
                        cv2.circle(image, (x, y), circle_radius+1, color_theme['dot'], -1, cv2.LINE_AA)
                        if "Left" in name:
                            dx, dy = offsets["Left"]
                        elif "Right" in name:
                            dx, dy = offsets["Right"]
                        else:
                            dx, dy = offsets["default"]
                        draw_text_with_shadow(image, name, (x+dx, y+dy), font_scale, (0,0,0), text_thickness)
                    except:
                        pass
                mp_drawing.draw_landmarks(
                    image,
                    results.pose_landmarks,
                    mp_holistic.POSE_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness, circle_radius=circle_radius),
                    mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness)
                )

            # Process hands for fingertip trails and detect open hands
            hands_open_messages = []
            for hand_side, hand_landmarks, finger_trails in [
                ("Right", results.right_hand_landmarks, finger_trails_right),
                ("Left", results.left_hand_landmarks, finger_trails_left)
            ]:
                if hand_landmarks:
                    for idx, part in finger_landmarks.items():
                        try:
                            lm = hand_landmarks.landmark[idx]
                            x,y = int(lm.x * w), int(lm.y * h)
                            finger_trails[idx].append((x,y))
                            cv2.circle(image, (x,y), circle_radius+1, color_theme['dot'], -1, cv2.LINE_AA)
                            dx, dy = offsets[hand_side]
                            draw_text_with_shadow(image, f"{hand_side} {part}", (x+dx, y+dy), font_scale, (0,0,0), text_thickness)
                        except:
                            pass
                    mp_drawing.draw_landmarks(
                        image,
                        hand_landmarks,
                        mp_holistic.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness, circle_radius=circle_radius),
                        mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness)
                    )
                    # Detect open hand but do NOT draw wrist-fingertip yellow lines
                    if is_hand_open(hand_landmarks, w, h):
                        hands_open_messages.append(f"{hand_side} Hand Open")
                    # Draw fingertip trails as lines
                    for trail in finger_trails.values():
                        for i in range(1, len(trail)):
                            cv2.line(image, trail[i-1], trail[i], (0,255,255), 1)

            # Display open hand messages on screen
            base_y = 140
            for i, msg in enumerate(hands_open_messages):
                draw_text_with_shadow(image, msg, (15, base_y + i*30), 0.7, (0,255,0), 2)

            # Estimate and display wrist distance if both hands detected
            if results.left_hand_landmarks and results.right_hand_landmarks:
                lw = np.array([results.left_hand_landmarks.landmark[0].x,
                               results.left_hand_landmarks.landmark[0].y,
                               results.left_hand_landmarks.landmark[0].z])
                rw = np.array([results.right_hand_landmarks.landmark[0].x,
                               results.right_hand_landmarks.landmark[0].y,
                               results.right_hand_landmarks.landmark[0].z])
                dist_norm = np.linalg.norm(lw - rw)
                scale_factor = 2.0
                distance_meters = dist_norm * scale_factor
                dist_text = f"Hands Distance: {distance_meters:.2f} m"
                draw_text_with_shadow(image, dist_text, (15, base_y + len(hands_open_messages)*30), 0.7, (255,255,0), 2)

            # FER emotion detection every few frames for efficiency
            if frame_counter % emotion_frame_skip == 0:
                rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                cached_emotions = emotion_detector.detect_emotions(rgb_image)

            # Use heuristic sleepy/active detection
            custom_emotion = None
            if results.face_landmarks:
                custom_emotion = detect_sleepy_active(results.face_landmarks, w, h)

            # FER dominant emotion or fallback to Neutral
            if cached_emotions:
                top_emotion = max(cached_emotions[0]['emotions'].items(), key=lambda x: x[1])
                raw_emotion = f"{top_emotion[0].capitalize()} ({top_emotion[1]*100:.1f}%)"
            else:
                raw_emotion = "Neutral"

            # Show custom if detected else smooth FER emotion label
            display_emotion = custom_emotion if custom_emotion else smooth_emotion(raw_emotion)

            # Display current emotion label on image
            draw_text_with_shadow(image, display_emotion, (15, 80), 0.7, (0,0,255), 2)

            # Pose confidence and posture evaluation
            if results.pose_landmarks:
                torso_ids = [11,12,23,24,25,26]
                visibilities = [results.pose_landmarks.landmark[i].visibility for i in torso_ids]
                avg_conf = sum(visibilities)/len(visibilities)
                conf_str = f"Pose Confidence: {avg_conf:.2f}"

                left_shoulder = results.pose_landmarks.landmark[11]
                left_hip = results.pose_landmarks.landmark[23]
                torso_vec = np.array([left_shoulder.x - left_hip.x, left_shoulder.y - left_hip.y])
                vertical_vec = np.array([0,-1])
                torso_vec_norm = torso_vec / (np.linalg.norm(torso_vec)+1e-6)
                angle_rad = np.arccos(np.clip(np.dot(torso_vec_norm, vertical_vec), -1.0, 1.0))
                angle_deg = np.degrees(angle_rad)
                posture_str = "Good Posture" if angle_deg < 15 else "Poor Posture"

                cv2.putText(image, conf_str, (15,40), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                            (0,255,0) if avg_conf > 0.5 else (0,0,255), 2, cv2.LINE_AA)
                cv2.putText(image, posture_str, (15,110), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                            (0,255,0) if posture_str == "Good Posture" else (0,0,255),2,cv2.LINE_AA)

            # Display control instructions on bottom-left corner
            info_text = "q=quit | space=pause/resume | s=screenshot | c=theme"
            cv2.putText(image, info_text, (8, h-16), cv2.FONT_HERSHEY_SIMPLEX,
                        0.38, (80, 40, 10), 1, cv2.LINE_AA)

            # Show the final processed frame
            cv2.imshow(window_name, image)

        # Keyboard events for quit, pause/resume, screenshots, and theme switch
        key = cv2.waitKey(10) & 0xFF
        if key == ord('q'):
            break
        elif key == ord(' '):
            paused = not paused
        elif key == ord('s') and not paused:
            img_count += 1
            cv2.imwrite(f"screenshot_{img_count}.png", image)
            print(f"Screenshot saved as screenshot_{img_count}.png")
        elif key == ord('c'):
            theme_idx = (theme_idx + 1) % len(color_themes)

# Release resources gracefully
cap.release()
cv2.destroyAllWindows()