In [None]:
Finger Extension Ability: How extended each finger is in real time. A fully extended finger shows near 100%, a curled finger shows lower values.

Palm Size (Relative): How close or large the palm is relative to the camera frame size, useful to estimate hand distance.

In [32]:
import mediapipe as mp
import cv2
import numpy as np

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
from mediapipe.python.solutions.drawing_utils import DrawingSpec

custom_landmark_style = DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=6)  # Green landmarks, larger circles
custom_connection_style = DrawingSpec(color=(0, 0, 255), thickness=3)                # Blue connections, thicker lines


# Initialize the hands module
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

def euclidean_dist(point1, point2):
    return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

# Define the get_finger_extension function i
def get_finger_extension(landmarks, tip_idx, base_idx):
    # Calculate the extension as the ratio of current distance to maximum possible
    current_dist = euclidean_dist(landmarks[tip_idx], landmarks[base_idx])
    max_dist = euclidean_dist(landmarks[0], landmarks[9]) * 1.5  # Using palm size as reference
    return min(current_dist / max_dist, 1.0)


cap = cv2.VideoCapture(0)
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue
        
    # Convert to RGB for MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    
    h, w, _ = frame.shape
    
    # Setup for visualization
    info_x = 20
    y_start = 50
    gap = 60
    bar_width = 200
    bar_height = 20
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )
            
            # Convert normalized landmarks to pixel coordinates
            landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
            landmarks_px = [(int(x * w), int(y * h), z) for x, y, z in landmarks]
            
            # Finger landmark indices per MediaPipe hands documentation
            finger_tips = [4, 8, 12, 16, 20]  # Thumb tip, Index tip, etc.
            finger_bases = [2, 5, 9, 13, 17]  # Thumb IP, Index MCP, etc.
            
            finger_names = ['Thumb', 'Index', 'Middle', 'Ring', 'Pinky']
            
            for i, (tip_idx, base_idx) in enumerate(zip(finger_tips, finger_bases)):
                extension = get_finger_extension(landmarks_px, tip_idx, base_idx)
                percent = int(extension * 100)
                
                # Draw name and percent
                y = y_start + i * gap
                cv2.putText(frame, f'{finger_names[i]}: {percent}%', (info_x, y + 15),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                
                # Draw bar graph background
                cv2.rectangle(frame, (info_x, y + 30), (info_x + bar_width, y + 30 + bar_height),
                              (50, 50, 50), -1)
                
                # Draw bar graph fill
                cv2.rectangle(frame, (info_x, y + 30), (info_x + int(extension * bar_width), y + 30 + bar_height),
                              (0, 191, 255), -1)
            
            # Palm size as distance wrist(0) to middle_finger_mcp(9)
            palm_size = euclidean_dist(landmarks_px[0], landmarks_px[9])
            palm_percent = int(min(palm_size / (w/4), 1) * 100)
            
            y = y_start + 5 * gap
            cv2.putText(frame, f'Palm size (relative): {palm_percent}%', (info_x, y + 15),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.rectangle(frame, (info_x, y + 30), (info_x + bar_width, y + 30 + bar_height),
                          (50, 50, 50), -1)
            cv2.rectangle(frame, (info_x, y + 30), (info_x + int(palm_percent / 100 * bar_width), y + 30 + bar_height),
                          (0, 255, 0), -1)
    
    cv2.imshow('MediaPipe Hand Tracking & Finger Measurement', frame)
    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
Finger Joint Angles:

It calculates the angle at the MCP (knuckle), PIP (middle joint), and DIP (near fingertip joint) of the index finger.

The angles are computed based on the three-dimensional positions of the relevant joints, showing the degree the finger is bent at each joint.

In [31]:
import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.backends.backend_agg import FigureCanvasAgg

mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

# Initialize video capture
cap = cv2.VideoCapture(0)

# Create figure for plotting
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.invert_yaxis()  # Invert y-axis to match image coordinates
ax.set_title('Face Landmarks')
ax.set_aspect('equal')
scatter = ax.scatter([], [], s=10, color='blue')

# Function to update the plot with new landmarks
def update_plot(face_points_normalized):
    if face_points_normalized:
        x = [p[0] for p in face_points_normalized]
        y = [p[1] for p in face_points_normalized]
        scatter.set_offsets(np.column_stack([x, y]))
    return scatter,

# Function to convert matplotlib figure to OpenCV image
def fig_to_image(fig):
    canvas = FigureCanvasAgg(fig)
    canvas.draw()
    buf = canvas.buffer_rgba()
    w, h = canvas.get_width_height()
    img_array = np.frombuffer(buf, dtype=np.uint8).reshape(h, w, 4)
    img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2BGR)
    return img_array

with mp_face_mesh.FaceMesh(
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as face_mesh:
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        frame = cv2.flip(frame, 1)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(frame_rgb)

        # Clear the plot for new data
        ax.clear()
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.invert_yaxis()
        ax.set_title('Face Landmarks')
        ax.set_aspect('equal')

        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                # Draw landmarks on the video frame
                mp_drawing.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_TESSELATION,
                    landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1, circle_radius=1),
                    connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=1))

                # Extract normalized landmark coordinates (0-1 range)
                face_points_normalized = [(lm.x, lm.y) for lm in face_landmarks.landmark]
                
                # Plot landmarks
                x = [p[0] for p in face_points_normalized]
                y = [p[1] for p in face_points_normalized]
                ax.scatter(x, y, s=10, color='blue')

        # Convert the matplotlib figure to an OpenCV image
        plot_img = fig_to_image(fig)
        
        # Resize plot image to match frame size for display
        h, w = frame.shape[:2]
        plot_img = cv2.resize(plot_img, (w, h))
        
        # Display both the video feed and the plot side by side
        combined_img = np.hstack((frame, plot_img))
        
        # If the combined image is too large, resize it
        if combined_img.shape[1] > 1920:  # Standard HD width
            scale_factor = 1920 / combined_img.shape[1]
            combined_img = cv2.resize(combined_img, (0, 0), fx=scale_factor, fy=scale_factor)
        
        cv2.imshow('Face Mesh with Real-time Plot', combined_img)
        
        if cv2.waitKey(5) & 0xFF == ord('q'):  # Press ESC to exit
            break

cap.release()
cv2.destroyAllWindows()
plt.close(fig)