Importing Libraries

In [13]:
import cv2
import mediapipe as mp
import numpy as np
from collections import deque
from fer import FER

Helping function for drawing of lines and shadows

In [14]:
# Function to draw text with a white shadow for better readability
def draw_text_with_shadow(image, text, pos, font_scale, color, thickness):
    x, y = pos
    # Draw white shadow for contrast
    cv2.putText(image, text, (x + 1, y + 1), cv2.FONT_HERSHEY_SIMPLEX,
                font_scale, (255, 255, 255), thickness + 1, cv2.LINE_AA)
    # Draw main text
    cv2.putText(image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX,
                font_scale, color, thickness, cv2.LINE_AA)

Function for checking the hand opening

In [15]:
## Check if hand is open based on finger positions and spread angle between index and pinky fingertips
def is_hand_open(hand_landmarks, image_width, image_height):
    wrist = hand_landmarks.landmark[0]
    tips = [hand_landmarks.landmark[i] for i in [4, 8, 12, 16, 20]]  # Thumb and fingertips
    wrist_y = wrist.y

    # Count fingers raised (tips above wrist vertically), ignoring thumb
    count_up = sum(lm.y < wrist_y for lm in tips[1:])

    # Calculate spread angle between index and pinky fingertips
    idx_tip = tips[1]
    pinky_tip = tips[4]

    idx_x, idx_y = int(idx_tip.x * image_width), int(idx_tip.y * image_height)
    pinky_x, pinky_y = int(pinky_tip.x * image_width), int(pinky_tip.y * image_height)
    wrist_x, wrist_y_px = int(wrist.x * image_width), int(wrist.y * image_height)

    vec_idx = np.array([idx_x - wrist_x, idx_y - wrist_y_px])
    vec_pinky = np.array([pinky_x - wrist_x, pinky_y - wrist_y_px])

    # Normalize vectors to calculate angle safely
    vec_idx_norm = vec_idx / (np.linalg.norm(vec_idx) + 1e-6)
    vec_pinky_norm = vec_pinky / (np.linalg.norm(vec_pinky) + 1e-6)
    angle_rad = np.arccos(np.clip(np.dot(vec_idx_norm, vec_pinky_norm), -1.0, 1.0))
    angle_deg = np.degrees(angle_rad)

    # Hand is open if at least 4 fingers up and spread angle > 30 degrees
    return count_up >= 4 and angle_deg > 30

Initializing Mediapipe utilities

In [16]:
# Initialize MediaPipe holistic and drawing utilities
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

Detection of facial landmarks

In [17]:
# Important facial and body landmarks to label
important_face_landmarks = {
    "Forehead": 10, "Eyes": 33, "Nose": 1, "Lips": 61,
    "Chin": 199, "Left Ear": 127, "Right Ear": 356
}

important_body_landmarks = {
    "Left Shoulder": 11, "Right Shoulder": 12,
    "Left Elbow": 13, "Right Elbow": 14,
    "Left Wrist": 15, "Right Wrist": 16,
    "Left Hip": 23, "Right Hip": 24,
    "Left Knee": 25, "Right Knee": 26,
    "Left Ankle": 27, "Right Ankle": 28
}

# Finger landmarks and their names
finger_landmarks = {
    0: "Palm", 4: "Thumb Tip", 8: "Index Tip",
    12: "Middle Tip", 16: "Ring Tip", 20: "Pinky Tip"
}

# Text offsets for labeling landmarks for better positioning
offsets = {
    "Left": (-25, -12),
    "Right": (23, -10),
    "default": (10, -14)
}

Styling part

In [18]:
# Drawing parameters
line_thickness = 1
circle_radius = 2
font_scale = 0.35
text_thickness = 1

Emotion Tracking

In [19]:
# Initialize FER emotion detector with MTCNN for face detection
emotion_detector = FER(mtcnn=True)

# Keep track of fingertip trails (max length 20 points)
max_trails_len = 20
finger_trails_right = {i: deque(maxlen=max_trails_len) for i in finger_landmarks.keys()}
finger_trails_left = {i: deque(maxlen=max_trails_len) for i in finger_landmarks.keys()}

# Emotion smoothing to reduce jitter by majority in last 10 detections
emotion_history = deque(maxlen=10)
def smooth_emotion(emotion_label):
    emotion_history.append(emotion_label)
    most_common = max(set(emotion_history), key=emotion_history.count)
    return most_common

# Custom heuristic to detect sleepy or active state based on eye openness
def detect_sleepy_active(face_landmarks, width, height):
    try:
        left_eye_upper = face_landmarks.landmark[159]
        left_eye_lower = face_landmarks.landmark[145]
        right_eye_upper = face_landmarks.landmark[386]
        right_eye_lower = face_landmarks.landmark[374]

        left_eye_open = abs(left_eye_upper.y - left_eye_lower.y)
        right_eye_open = abs(right_eye_upper.y - right_eye_lower.y)
        avg_eye_open = (left_eye_open + right_eye_open) / 2

        sleepy_threshold = 0.008
        active_threshold = 0.02

        if avg_eye_open < sleepy_threshold:
            return "Sleepy"
        elif avg_eye_open > active_threshold:
            return "Active"
        else:
            return None
    except Exception:
        return None

Setting up the webcam (upto full screen)

In [21]:
# Set up webcam capture and full screen window
cap = cv2.VideoCapture(0)
window_name = 'MediaPipe + FER Emotion Detection'
cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

paused = False
img_count = 0
theme_idx = 0

# Define some color themes for drawing
color_themes = [
    {'dot': (0, 0, 255), 'line': (0, 0, 0)},         # Red dots, black lines
    {'dot': (0, 255, 0), 'line': (160, 32, 240)},   # Green dots, purple lines
    {'dot': (255, 127, 36), 'line': (30, 30, 30)}   # Orange dots, dark gray lines
]

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        if not paused:
            ret, frame = cap.read()
            if not ret:
                break

            # Convert image color space for MediaPipe processing
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(image_rgb)
            image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
            h, w, _ = image.shape
            color_theme = color_themes[theme_idx]

            # Draw and label important face landmarks
            if results.face_landmarks:
                for name, idx in important_face_landmarks.items():
                    try:
                        lm = results.face_landmarks.landmark[idx]
                        x, y = int(lm.x * w), int(lm.y * h)
                        cv2.circle(image, (x, y), circle_radius, color_theme['dot'], -1, cv2.LINE_AA)
                        dx, dy = offsets["default"]
                        draw_text_with_shadow(image, name, (x + dx, y + dy), font_scale, (0, 0, 0), text_thickness)
                    except Exception:
                        pass

            # Draw and label important body landmarks
            if results.pose_landmarks:
                for name, idx in important_body_landmarks.items():
                    try:
                        lm = results.pose_landmarks.landmark[idx]
                        x, y = int(lm.x * w), int(lm.y * h)
                        cv2.circle(image, (x, y), circle_radius + 1, color_theme['dot'], -1, cv2.LINE_AA)
                        if "Left" in name:
                            dx, dy = offsets["Left"]
                        elif "Right" in name:
                            dx, dy = offsets["Right"]
                        else:
                            dx, dy = offsets["default"]
                        draw_text_with_shadow(image, name, (x + dx, y + dy), font_scale, (0, 0, 0), text_thickness)
                    except Exception:
                        pass

                # Draw pose connections using MediaPipe utility
                mp_drawing.draw_landmarks(
                    image,
                    results.pose_landmarks,
                    mp_holistic.POSE_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness, circle_radius=circle_radius),
                    mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness)
                )

            # Process hand landmarks, fingertip trails, labels, and open-hand gesture lines
            # Also display messages if hands are open
            hands_open_messages = []
            for hand_side, hand_landmarks, finger_trails in [
                ("Right", results.right_hand_landmarks, finger_trails_right),
                ("Left", results.left_hand_landmarks, finger_trails_left)
            ]:
                if hand_landmarks:
                    for idx, part in finger_landmarks.items():
                        try:
                            lm = hand_landmarks.landmark[idx]
                            x, y = int(lm.x * w), int(lm.y * h)
                            finger_trails[idx].append((x, y))
                            cv2.circle(image, (x, y), circle_radius + 1, color_theme['dot'], -1, cv2.LINE_AA)
                            dx, dy = offsets[hand_side]
                            draw_text_with_shadow(image, f"{hand_side} {part}", (x + dx, y + dy), font_scale, (0, 0, 0), text_thickness)
                        except Exception:
                            pass

                    # Draw hand connections
                    mp_drawing.draw_landmarks(
                        image,
                        hand_landmarks,
                        mp_holistic.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness, circle_radius=circle_radius),
                        mp_drawing.DrawingSpec(color=color_theme['line'], thickness=line_thickness)
                    )

                    # If hand is open, draw lines from wrist to fingertips
                    if is_hand_open(hand_landmarks, w, h):
                        wrist_lm = hand_landmarks.landmark[0]
                        wrist_xy = (int(wrist_lm.x * w), int(wrist_lm.y * h))
                        fingertip_ids = [4, 8, 12, 16, 20]
                        for tip_idx in fingertip_ids:
                            tip_lm = hand_landmarks.landmark[tip_idx]
                            tip_xy = (int(tip_lm.x * w), int(tip_lm.y * h))
                            cv2.line(image, wrist_xy, tip_xy, (0, 255, 255), 2)
                        # Add message for open hand
                        hands_open_messages.append(f"{hand_side} Hand Open")

                    # Draw fingertip trails as lines
                    for trail in finger_trails.values():
                        for i in range(1, len(trail)):
                            cv2.line(image, trail[i - 1], trail[i], (0, 255, 255), 1)

            # Display open hand messages on screen, vertically spaced
            base_y = 140
            for i, msg in enumerate(hands_open_messages):
                draw_text_with_shadow(image, msg, (15, base_y + i * 30), 0.7, (0, 255, 0), 2)

            # If both hands detected, calculate and display distance in meters (approximate)
            if results.left_hand_landmarks and results.right_hand_landmarks:
                left_wrist = results.left_hand_landmarks.landmark[0]
                right_wrist = results.right_hand_landmarks.landmark[0]

                # Normalized 3D coordinates
                lw = np.array([left_wrist.x, left_wrist.y, left_wrist.z])
                rw = np.array([right_wrist.x, right_wrist.y, right_wrist.z])

                # Euclidean distance in normalized coordinate space
                dist_norm = np.linalg.norm(lw - rw)

                # Approximate scale:
                # At ~1 meter from camera, normalized distance ~0.3 corresponds roughly to ~0.6 m
                # You can calibrate scale_factor as needed
                scale_factor = 2.0  # empirical scale factor for meters conversion
                distance_meters = dist_norm * scale_factor

                distance_text = f"Hands Distance: {distance_meters:.2f} m"
                draw_text_with_shadow(image, distance_text, (15, base_y + len(hands_open_messages) * 30), 0.7, (255, 255, 0), 2)

            # Emotion detection using FER on the current frame
            rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            emotions = emotion_detector.detect_emotions(rgb_frame)

            # Detect sleepy or active states based on eye openness (heuristic)
            custom_emotion = None
            if results.face_landmarks:
                custom_emotion = detect_sleepy_active(results.face_landmarks, w, h)

            # Get the highest detected emotion if any
            if emotions:
                top_emotion = max(emotions[0]['emotions'].items(), key=lambda x: x[1])
                raw_emotion = f"{top_emotion[0].capitalize()} ({top_emotion[1] * 100:.1f}%)"
            else:
                raw_emotion = "Neutral"

            # Use custom detected emotion if available, else apply smoothing on raw emotion
            display_emotion = custom_emotion if custom_emotion else smooth_emotion(raw_emotion)

            # Display the emotion label on the image
            draw_text_with_shadow(image, display_emotion, (15, 80), 0.7, (0, 0, 255), 2)

            # Pose confidence (average visibility) and evaluate sitting posture
            if results.pose_landmarks:
                torso_ids = [11, 12, 23, 24, 25, 26]  # Shoulders, hips, knees
                visibilities = [results.pose_landmarks.landmark[i].visibility for i in torso_ids]
                avg_conf = sum(visibilities) / len(visibilities)
                confidence_str = f"Pose Confidence: {avg_conf:.2f}"

                # Calculate torso vertical angle using left shoulder and left hip
                left_shoulder = results.pose_landmarks.landmark[11]
                left_hip = results.pose_landmarks.landmark[23]
                torso_vec = np.array([left_shoulder.x - left_hip.x, left_shoulder.y - left_hip.y])
                vertical_vec = np.array([0, -1])
                torso_vec_norm = torso_vec / (np.linalg.norm(torso_vec) + 1e-6)
                angle_rad = np.arccos(np.clip(np.dot(torso_vec_norm, vertical_vec), -1.0, 1.0))
                angle_deg = np.degrees(angle_rad)
                posture_str = "Good Posture" if angle_deg < 15 else "Poor Posture"

                # Display pose confidence and posture assessment on frame
                cv2.putText(image, confidence_str, (15, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            0.6, (0, 255, 0) if avg_conf > 0.5 else (0, 0, 255), 2, cv2.LINE_AA)
                cv2.putText(image, posture_str, (15, 110), cv2.FONT_HERSHEY_SIMPLEX,
                            0.6, (0, 255, 0) if posture_str == "Good Posture" else (0, 0, 255), 2, cv2.LINE_AA)

            # Instructions on controls displayed at the screen bottom-left
            info_text = "q=quit | space=pause/resume | s=screenshot | c=theme"
            cv2.putText(image, info_text, (8, h - 16), cv2.FONT_HERSHEY_SIMPLEX,
                        0.38, (80, 40, 10), 1, cv2.LINE_AA)

            # Show the processed frame in fullscreen window
            cv2.imshow(window_name, image)

        # Keyboard event handling
        key = cv2.waitKey(10) & 0xFF
        if key == ord('q'):
            break
        elif key == ord(' '):
            paused = not paused
        elif key == ord('s') and not paused:
            img_count += 1
            cv2.imwrite(f"screenshot_{img_count}.png", image)
            print(f"Screenshot saved as screenshot_{img_count}.png")
        elif key == ord('c'):
            theme_idx = (theme_idx + 1) % len(color_themes)

# Release resources after loop exits
cap.release()
cv2.destroyAllWindows()