# Hand Gesture Recognition System  
### Computer Vision Project using MediaPipe Hands

In [None]:
# PART 1: IMPORTS AND INITIALIZATION
import cv2                     
import mediapipe as mp          # MediaPipe for hand detection 
import numpy as np              
import time                     
import pandas as pd             
from collections import defaultdict  
from datetime import datetime
import os
import matplotlib.pyplot as plt  

# Initialize MediaPipe Hands solution
# This provides pre-trained models for detecting hand landmarks in real-time
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils


hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)


In [2]:
# PART 2: HELPER FUNCTIONS FOR FINGER DETECTION

def is_finger_extended(landmarks, finger_tip_id, finger_pip_id):
    """
    Check if a specific finger is extended based on landmark positions
    Returns:
        Boolean: True if finger is extended, False if curled
    Logic:
        - If the tip is above (smaller y-value) the PIP joint, finger is extended
        - MediaPipe uses normalized coordinates where y=0 is top, y=1 is bottom
    """
    tip = landmarks[finger_tip_id]
    pip = landmarks[finger_pip_id]
    return tip.y < pip.y 

def is_thumb_extended(landmarks):
    """
    Special case for thumb - check horizontal extension relative to palm
    Returns:Boolean: True if thumb is extended, False if curled
    """
    thumb_tip = landmarks[mp_hands.HandLandmark.THUMB_TIP]
    thumb_ip = landmarks[mp_hands.HandLandmark.THUMB_IP]
    thumb_mcp = landmarks[mp_hands.HandLandmark.THUMB_MCP]
    
    # Get the palm center x-coordinate (wrist position)
    palm_center_x = landmarks[mp_hands.HandLandmark.WRIST].x
    
    # Thumb is extended if its tip is further from wrist than its base
    return abs(thumb_tip.x - palm_center_x) > abs(thumb_mcp.x - palm_center_x)


In [3]:
# PART 3: GESTURE CLASSIFICATION LOGIC

def classify_gesture(hand_landmarks):
    """
    Classify a detected hand into one of the predefined gestures
    
    Uses a rule-based system:
    1. Extract all finger extension states using helper functions
    2. Apply rules to match specific gesture patterns
    3. Return the recognized gesture or "UNKNOWN"
    
    Recognized gestures:
        - FIST: No fingers extended
        - OPEN_PALM: All fingers extended
        - THUMBS_UP: Only thumb extended
        - PEACE_SIGN: Index and middle finger extended
        - POINTING: Only index finger extended
        - UNKNOWN: Doesn't match any pattern
    Returns:
        String: Name of the recognized gesture
    """
    landmarks = hand_landmarks.landmark

    # Determine which fingers are extended using the helper functions
    thumb_extended = is_thumb_extended(landmarks)
    index_extended = is_finger_extended(landmarks,
                                        mp_hands.HandLandmark.INDEX_FINGER_TIP,
                                        mp_hands.HandLandmark.INDEX_FINGER_PIP)
    middle_extended = is_finger_extended(landmarks,
                                         mp_hands.HandLandmark.MIDDLE_FINGER_TIP,
                                         mp_hands.HandLandmark.MIDDLE_FINGER_PIP)
    ring_extended = is_finger_extended(landmarks,
                                       mp_hands.HandLandmark.RING_FINGER_TIP,
                                       mp_hands.HandLandmark.RING_FINGER_PIP)
    pinky_extended = is_finger_extended(landmarks,
                                        mp_hands.HandLandmark.PINKY_TIP,
                                        mp_hands.HandLandmark.PINKY_PIP)

    # Apply gesture classification rules
    # FIST: No fingers extended
    if not any([thumb_extended, index_extended, middle_extended, ring_extended, pinky_extended]):
        return "FIST"
    
    # OPEN_PALM: All fingers extended
    if all([thumb_extended, index_extended, middle_extended, ring_extended, pinky_extended]):
        return "OPEN_PALM"
    
    # THUMBS_UP: Only thumb extended
    if thumb_extended and not any([index_extended, middle_extended, ring_extended, pinky_extended]):
        return "THUMBS_UP"
    
    # PEACE_SIGN: Index and middle extended, others curled
    if index_extended and middle_extended and not any([ring_extended, pinky_extended]):
        return "PEACE_SIGN"
    
    # POINTING: Only index finger extended
    if index_extended and not any([middle_extended, ring_extended, pinky_extended]):
        return "POINTING"

    # If no pattern matches, return UNKNOWN
    return "UNKNOWN"


In [4]:
# PART 4: REAL-TIME GESTURE RECOGNITION FUNCTION

def run_gesture_recognition(duration=60, show_landmarks=True):
    """
    Capture video from webcam and perform real-time gesture recognition
    
    This function:
    1. Opens the default webcam (camera index 0)
    2. Processes each video frame to detect hand landmarks
    3. Classifies detected hand gestures in real-time
    4. Displays the video with landmark overlays and gesture predictions
    5. Runs for a specified duration or until user presses 'q'

    Args:
        duration: How many seconds to run (default 60 seconds)
        show_landmarks: If True, draw hand skeleton on video (default True)
    
    Controls:
        Press 'q' to quit early
    """
    # Initialize video capture from default webcam
    cap = cv2.VideoCapture(0)
    start_time = time.time()
    current_gesture = "NONE"  # Track the currently detected gesture

    print("Starting gesture recognition...")
    print("Press 'q' to quit\n")

    # Main recognition loop
    while cap.isOpened() and (time.time() - start_time) < duration:
        ret, frame = cap.read()
        if not ret:
            break

        # Flip frame horizontally for selfie view (mirror effect)
        frame = cv2.flip(frame, 1)
        
        # Convert BGR (OpenCV default) to RGB for MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process frame with MediaPipe hands detector
        results = hands.process(rgb_frame)

        # If hands are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw hand skeleton (21 landmarks connected by lines)
                if show_landmarks:
                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Classify the gesture
                gesture = classify_gesture(hand_landmarks)
                
                # Print only when gesture changes (to avoid spam)
                if gesture != current_gesture:
                    current_gesture = gesture
                    print("Detected:", current_gesture)
        else:
            # No hand detected
            if current_gesture != "NONE":
                current_gesture = "NONE"
                print("No hand detected")

        # Display current gesture on the video frame
        cv2.putText(frame, f"Gesture: {current_gesture}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (0, 255, 0), 2)

        # Show the video window
        cv2.imshow("Hand Gesture Recognition", frame)

        # Check for 'q' key press to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Cleanup: release resources
    cap.release()
    cv2.destroyAllWindows()


In [5]:
# Part 5: RUN BASIC GESTURE RECOGNITION
run_gesture_recognition(duration=60)


Starting gesture recognition...
Press 'q' to quit



In [6]:
# Class for systematically collecting and evaluating gesture recognition performance
class GestureEvaluator:
    """
    Class for collecting gesture evaluation data and calculating performance metrics
    
    Purpose:
    - Collect labeled samples of different gestures under various conditions
    - Track ground truth vs predicted labels
    - Calculate accuracy metrics at multiple levels (overall, by gesture, by condition)
    - Generate confusion matrices to understand misclassifications
    - Save/load evaluation results to/from CSV files
    
    Data collected:
        For each sample: timestamp, ground truth gesture, predicted gesture,
        correctness, distance (close/medium/far), background (clean/cluttered),
        and model confidence score
    """

    def __init__(self):
        """Initialize the evaluator with empty data list and gesture names"""
        self.evaluation_data = []  # List to store all evaluation samples
        self.gesture_names = ["FIST", "OPEN_PALM", "THUMBS_UP", "PEACE_SIGN", "POINTING", "UNKNOWN"]

    def collect_evaluation_data(self, gesture_name, num_samples=30,
                                distance="medium", background="clean"):
        
        """Collect labeled samples of a specific gesture under specific conditions"""
        cap = cv2.VideoCapture(0)
        samples_collected = 0
        collecting = False

        print(f"\nCollecting data for: {gesture_name}")
        print(f"Distance: {distance}, Background: {background}")
        print("Press 's' to start, 'q' to quit\n")

        while cap.isOpened() and samples_collected < num_samples:
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.flip(frame, 1)
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(rgb_frame)

            predicted = "NONE"
            confidence_score = 0.0

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    # Draw hand skeleton on frame
                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                    
                    # Classify the gesture
                    predicted = classify_gesture(hand_landmarks)
                    
                    # Get confidence score from MediaPipe's hand detection
                    confidence_score = results.multi_handedness[0].classification[0].score

                    # If actively collecting, save this sample
                    if collecting:
                        self.evaluation_data.append({
                            'timestamp': datetime.now().isoformat(),
                            'ground_truth': gesture_name,
                            'predicted': predicted,
                            'correct': predicted == gesture_name,
                            'distance': distance,
                            'background': background,
                            'confidence': confidence_score
                        })
                        samples_collected += 1

            # Display status on frame
            status = "COLLECTING" if collecting else "READY - Press 's'"
            cv2.putText(frame, f"{status} ({samples_collected}/{num_samples})",
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

            cv2.putText(frame, f"Ground Truth: {gesture_name}",
                        (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)

            cv2.putText(frame, f"Predicted: {predicted}",
                        (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

            cv2.imshow("Data Collection", frame)
            key = cv2.waitKey(1) & 0xFF

            if key == ord('s'):
                collecting = True
                print("Started collecting...")
            elif key == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()
        print(f"Collected {samples_collected} samples for {gesture_name}\n")

    def run_full_evaluation(self, samples_per_condition=30):
        """
        Run a comprehensive evaluation across all 
        gesture/distance/background combinations
        
        Tests every gesture with every distance and background combination
        This creates a complete picture of model robustness
        """
        distances = ["close", "medium", "far"]
        backgrounds = ["clean", "cluttered"]

        print("=" * 50)
        print(" FULL GESTURE EVALUATION ")
        print("=" * 50)

        for gesture in self.gesture_names:
            for distance in distances:
                for background in backgrounds:
                    print(f"Testing {gesture} | {distance} | {background}")
                    self.collect_evaluation_data(
                        gesture,
                        num_samples=samples_per_condition,
                        distance=distance,
                        background=background
                    )
                    time.sleep(1)

        # Save results after all data collection
        self.save_results()

    def calculate_metrics(self):
        """
        Calculate and display gesture recognition accuracy metrics
        Prints detailed results and returns a dictionary for further analysis
        """
        if not self.evaluation_data:
            print("No evaluation data found.")
            return

        # Convert evaluation data list to pandas DataFrame for easy analysis
        df = pd.DataFrame(self.evaluation_data)

        # Calculate accuracy at different levels
        overall_accuracy = df["correct"].mean() * 100  # Percentage of all correct predictions
        gesture_accuracy = df.groupby("ground_truth")["correct"].mean() * 100  # Per gesture
        distance_accuracy = df.groupby("distance")["correct"].mean() * 100  # Per distance
        background_accuracy = df.groupby("background")["correct"].mean() * 100  # Per background

        # Create confusion matrix: shows which gestures are confused with which
        # Each row is a ground truth gesture, each column is a predicted gesture
        confusion = pd.crosstab(df["ground_truth"], df["predicted"],
                                normalize="index") * 100

        # Print results
        print("\nOverall Accuracy:", round(overall_accuracy, 2), "%")
        print("\nGesture Accuracy:\n", gesture_accuracy)
        print("\nDistance Accuracy:\n", distance_accuracy)
        print("\nBackground Accuracy:\n", background_accuracy)
        print("\nConfusion Matrix:\n", confusion.round(2))

        return {
            "overall_accuracy": overall_accuracy,
            "gesture_accuracy": gesture_accuracy.to_dict(),
            "distance_accuracy": distance_accuracy.to_dict(),
            "background_accuracy": background_accuracy.to_dict(),
            "confusion": confusion.to_dict()
        }

    def save_results(self, filename="evaluation_results.csv"):
        """
     Save evaluation data to CSV file for later analysis
        Output:
            CSV file with columns:
            timestamp, ground_truth, predicted, correct, distance, background, confidence
        """
        df = pd.DataFrame(self.evaluation_data)
        df.to_csv(filename, index=False)
        print(f"Results saved to {filename}")

    def load_results(self, filename="evaluation_results.csv"):
        """
        Load previously saved evaluation results from CSV file
        filename: Name of CSV file to load (default "evaluation_results.csv")
        """
        df = pd.read_csv(filename)
        self.evaluation_data = df.to_dict("records")
        print(f"Loaded {len(self.evaluation_data)} samples from {filename}")


In [7]:
# PART 7: TESTING AND CSV GENERATION - CHOOSE ONE OPTION
# ============================================================================
# OPTION 1: Quick Robustness Test (Recommended for first-time testing)
# ============================================================================
# Run a quick test on a subset of gestures, collect 10 samples each
# Creates: robustness_test.csv
#
# To use: Uncomment and run this cell

def test_robustness():
    """
    Run a quick robustness test and save results to CSV
    
    Steps:
    1. Creates a GestureEvaluator instance
    2. Tests 6 gestures x 3 conditions (close/far/clean/cluttered) = 6 conditions total
    3. Collects 10 samples per gesture+condition combination
    4. Calculates accuracy metrics and displays them
    5. Saves all data to robustness_test.csv
    """
    evaluator = GestureEvaluator()

    print("Running quick robustness test...")
    print("This will test each gesture under different conditions")
    print("Follow on-screen prompts to collect samples\n")

    # Define test cases: (gesture, distance, background)
    test_cases = [
        ("FIST", "close", "clean"),
        ("FIST", "far", "cluttered"),
        ("OPEN_PALM", "medium", "clean"),
        ("THUMBS_UP", "close", "cluttered"),
        ("PEACE_SIGN", "far", "clean"),
        ("POINTING", "medium", "cluttered"),
    ]

    # Collect samples for each test case
    for gesture, dist, bg in test_cases:
        evaluator.collect_evaluation_data(
            gesture, num_samples=10, distance=dist, background=bg
        )

    # Calculate and display metrics
    print("\n" + "=" * 50)
    metrics = evaluator.calculate_metrics()
    
    # CRITICAL: Save results to CSV file
    evaluator.save_results("robustness_test.csv")
    print("=" * 50)
    print("✓ Robustness test complete!")
    print("✓ Results saved to: robustness_test.csv")

# This will run the robustness test when the cell is executed
test_robustness()

# ============================================================================
# SUMMARY: How to Generate CSV Files
# ============================================================================
# 1. Choose ONE option above and uncomment it
# 2. Run the cell (Shift+Enter or click Run)
# 3. Follow on-screen prompts:
#    - Press 's' to START collecting samples
#    - Press 'q' to QUIT
# 4. Wait for the function to finish
# 5. A CSV file will be created in your project folder:
#    - robustness_test.csv (from Option 1)
#    - evaluation_results.csv (from Option 2)
#    - my_evaluation.csv (from Option 3)
# ============================================================================


Running quick robustness test...
This will test each gesture under different conditions
Follow on-screen prompts to collect samples


Collecting data for: FIST
Distance: close, Background: clean
Press 's' to start, 'q' to quit


Collecting data for: FIST
Distance: close, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for FIST

Collected 0 samples for FIST


Collecting data for: FIST
Distance: far, Background: cluttered
Press 's' to start, 'q' to quit


Collecting data for: FIST
Distance: far, Background: cluttered
Press 's' to start, 'q' to quit

Collected 0 samples for FIST

Collected 0 samples for FIST


Collecting data for: OPEN_PALM
Distance: medium, Background: clean
Press 's' to start, 'q' to quit


Collecting data for: OPEN_PALM
Distance: medium, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for OPEN_PALM

Collected 0 samples for OPEN_PALM


Collecting data for: THUMBS_UP
Distance: close, Background: cluttered
Press 's' to start, 

In [8]:
# OPTION 2: Full Evaluation (Complete and thorough)
# Test ALL gestures with ALL distance/background combinations
# Creates: evaluation_results.csv

evaluator = GestureEvaluator()
evaluator.run_full_evaluation(samples_per_condition=30)

 FULL GESTURE EVALUATION 
Testing FIST | close | clean

Collecting data for: FIST
Distance: close, Background: clean
Press 's' to start, 'q' to quit


Collecting data for: FIST
Distance: close, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for FIST

Collected 0 samples for FIST

Testing FIST | close | cluttered
Testing FIST | close | cluttered

Collecting data for: FIST
Distance: close, Background: cluttered
Press 's' to start, 'q' to quit


Collecting data for: FIST
Distance: close, Background: cluttered
Press 's' to start, 'q' to quit

Collected 0 samples for FIST

Collected 0 samples for FIST

Testing FIST | medium | clean
Testing FIST | medium | clean

Collecting data for: FIST
Distance: medium, Background: clean
Press 's' to start, 'q' to quit


Collecting data for: FIST
Distance: medium, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for FIST

Collected 0 samples for FIST

Testing FIST | medium | cluttered
Testing FIST | medium | c



Collected 0 samples for POINTING

Testing POINTING | medium | cluttered
Testing POINTING | medium | cluttered

Collecting data for: POINTING
Distance: medium, Background: cluttered
Press 's' to start, 'q' to quit


Collecting data for: POINTING
Distance: medium, Background: cluttered
Press 's' to start, 'q' to quit

Collected 0 samples for POINTING

Collected 0 samples for POINTING

Testing POINTING | far | clean
Testing POINTING | far | clean

Collecting data for: POINTING
Distance: far, Background: clean
Press 's' to start, 'q' to quit


Collecting data for: POINTING
Distance: far, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for POINTING

Collected 0 samples for POINTING

Testing POINTING | far | cluttered
Testing POINTING | far | cluttered

Collecting data for: POINTING
Distance: far, Background: cluttered
Press 's' to start, 'q' to quit


Collecting data for: POINTING
Distance: far, Background: cluttered
Press 's' to start, 'q' to quit

Collected 0 sample

In [9]:
# OPTION 3: Custom Evaluation (Most flexible)
# Manually collect data for specific gestures and conditions

evaluator = GestureEvaluator()

# # Collect data for specific gestures
evaluator.collect_evaluation_data("FIST", num_samples=20, distance="close", background="clean")
evaluator.collect_evaluation_data("OPEN_PALM", num_samples=20, distance="medium", background="cluttered")
evaluator.collect_evaluation_data("THUMBS_UP", num_samples=20, distance="far", background="clean")
# 
#Calculate metrics
evaluator.calculate_metrics()
 
#Save to CSV (IMPORTANT!)
evaluator.save_results("my_evaluation.csv")


Collecting data for: FIST
Distance: close, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for FIST

Collected 0 samples for FIST


Collecting data for: OPEN_PALM
Distance: medium, Background: cluttered
Press 's' to start, 'q' to quit


Collecting data for: OPEN_PALM
Distance: medium, Background: cluttered
Press 's' to start, 'q' to quit

Collected 0 samples for OPEN_PALM

Collected 0 samples for OPEN_PALM


Collecting data for: THUMBS_UP
Distance: far, Background: clean
Press 's' to start, 'q' to quit


Collecting data for: THUMBS_UP
Distance: far, Background: clean
Press 's' to start, 'q' to quit

Collected 0 samples for THUMBS_UP

No evaluation data found.
Results saved to my_evaluation.csv
Collected 0 samples for THUMBS_UP

No evaluation data found.
Results saved to my_evaluation.csv


In [None]:
# OPTION 4: Load and Analyze Previous Results
# Load results from an existing CSV file and analyze them

evaluator = GestureEvaluator()
evaluator.load_results("Our-test/evaluation_results.csv")  # Load existing results
evaluator.calculate_metrics()  # Display analysis

EmptyDataError: No columns to parse from file

In [None]:
# PART 8: PLOTTING UTILITIES — Generate Figures & Tables from Evaluation Data
# Add graphs/tables for evaluation results (accuracy, confusion matrix, counts)
# Requires: matplotlib (already in requirements.txt), seaborn (optional, improves heatmap)

def plot_evaluation_results(evaluator, save_dir="results_plots", show_figs=False):
    """
    Create and save plots summarizing evaluator.evaluation_data.
    Produces PNG files:
        - per_gesture_accuracy.png
        - accuracy_by_distance.png (if 'distance' present)
        - accuracy_by_background.png (if 'background' present)
        - confusion_matrix.png
        - predictions_count.png
    """
    os.makedirs(save_dir, exist_ok=True)

    # Ensure data is present
    if not hasattr(evaluator, 'evaluation_data'):
        raise ValueError('Evaluator object has no attribute evaluation_data')

    df = pd.DataFrame(evaluator.evaluation_data)
    if df.empty:
        print('No evaluation data to plot. Collect data or load a CSV first.')
        return

    # Normalize expected boolean column
    if 'correct' in df.columns:
        df['correct'] = df['correct'].astype(bool)

    # 1) Per-gesture accuracy
    per_gesture = (df.groupby('ground_truth')['correct'].mean() * 100).sort_values(ascending=False)
    fig, ax = plt.subplots(figsize=(8, 5))
    per_gesture.plot(kind='bar', color='tab:blue', ax=ax)
    ax.set_ylabel('Accuracy (%)')
    ax.set_title('Per-Gesture Accuracy')
    ax.set_ylim(0, 100)
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.1f}%", (p.get_x() + p.get_width() / 2, p.get_height()),
                    ha='center', va='bottom', fontsize=9)
    fn = os.path.join(save_dir, 'per_gesture_accuracy.png')
    fig.tight_layout()
    fig.savefig(fn, dpi=150)
    if show_figs: plt.show()
    plt.close(fig)
    print('Saved:', fn)

    # 2) Accuracy by distance
    if 'distance' in df.columns:
        order = ['close', 'medium', 'far']
        distance_acc = (df.groupby('distance')['correct'].mean() * 100).reindex(order)
        fig, ax = plt.subplots(figsize=(6, 4))
        distance_acc.plot(kind='bar', color='tab:green', ax=ax)
        ax.set_ylabel('Accuracy (%)')
        ax.set_title('Accuracy by Distance')
        ax.set_ylim(0, 100)
        for p in ax.patches:
            ax.annotate(f"{(p.get_height() if not np.isnan(p.get_height()) else 0):.1f}%",
                        (p.get_x() + p.get_width() / 2, (p.get_height() if not np.isnan(p.get_height()) else 0)),
                        ha='center', va='bottom', fontsize=9)
        fn = os.path.join(save_dir, 'accuracy_by_distance.png')
        fig.tight_layout()
        fig.savefig(fn, dpi=150)
        if show_figs: plt.show()
        plt.close(fig)
        print('Saved:', fn)

    # 3) Accuracy by background 
    if 'background' in df.columns:
        bg_acc = (df.groupby('background')['correct'].mean() * 100)
        fig, ax = plt.subplots(figsize=(6, 4))
        bg_acc.plot(kind='bar', color='tab:orange', ax=ax)
        ax.set_ylabel('Accuracy (%)')
        ax.set_title('Accuracy by Background')
        ax.set_ylim(0, 100)
        for p in ax.patches:
            ax.annotate(f"{p.get_height():.1f}%", (p.get_x() + p.get_width() / 2, p.get_height()),
                        ha='center', va='bottom', fontsize=9)
        fn = os.path.join(save_dir, 'accuracy_by_background.png')
        fig.tight_layout()
        fig.savefig(fn, dpi=150)
        if show_figs: plt.show()
        plt.close(fig)
        print('Saved:', fn)

    # 4) Confusion matrix (percentage per ground-truth row)
    cm = pd.crosstab(df['ground_truth'], df['predicted'], normalize='index') * 100
    fig, ax = plt.subplots(figsize=(8, 6))

    im = ax.imshow(cm.fillna(0).values, cmap='viridis', vmin=0, vmax=100)
    ax.set_xticks(np.arange(len(cm.columns)))
    ax.set_yticks(np.arange(len(cm.index)))
    ax.set_xticklabels(cm.columns, rotation=45, ha='right')
    ax.set_yticklabels(cm.index)
    for i in range(len(cm.index)):
        for j in range(len(cm.columns)):
            val = cm.fillna(0).iloc[i, j]
            ax.text(j, i, f"{val:.1f}%", ha='center', va='center', color='white', fontsize=8)
    fig.colorbar(im, ax=ax, label='Percent')
    ax.set_title('Confusion Matrix (rows = ground truth, cols = predicted)')
    fig.tight_layout()
    fn = os.path.join(save_dir, 'confusion_matrix.png')
    fig.savefig(fn, dpi=150)
    if show_figs: plt.show()
    plt.close(fig)
    print('Saved:', fn)

    # 5) Predicted label counts
    counts = df['predicted'].value_counts()
    fig, ax = plt.subplots(figsize=(8, 4))
    counts.plot(kind='bar', color='tab:purple', ax=ax)
    ax.set_ylabel('Count')
    ax.set_title('Predicted Label Counts')
    for p in ax.patches:
        ax.annotate(f"{int(p.get_height())}", (p.get_x() + p.get_width() / 2, p.get_height()),
                    ha='center', va='bottom', fontsize=9)
    fn = os.path.join(save_dir, 'predictions_count.png')
    fig.tight_layout()
    fig.savefig(fn, dpi=150)
    if show_figs: plt.show()
    plt.close(fig)
    print('Saved:', fn)

    print('\nAll plots saved in:', save_dir)


# -------------------
# Small helper: load CSV and plot in one call
# -------------------

def load_results_and_plot(csv_filename, save_dir='results_plots', show_figs=False):
    #Load CSV into a GestureEvaluator-like object and call plot_evaluation_results.

    class _TmpEvaluator:
        pass

    tmp = _TmpEvaluator()
    try:
        df = pd.read_csv(csv_filename)
    except Exception as e:
        print('Could not read CSV:', e)
        return
    tmp.evaluation_data = df.to_dict('records')
    plot_evaluation_results(tmp, save_dir=save_dir, show_figs=show_figs)


In [None]:
# This will print the graphs and plots for each csv made
# comment and uncommont out which one you want to open

load_results_and_plot('Our-test/evaluation_results.csv', save_dir='evaluation_results_results', show_figs=True)
#load_results_and_plot('Our-test/my_evaluation.csv', save_dir='my_evaluation_results', show_figs=True)
#load_results_and_plot('Our-test/robustness_test.csv', save_dir='robustness_test_results', show_figs=True)
