In [10]:
import mediapipe as mp
import cv2
import time
import math
import os

# MediaPipe setup
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7)

# Gesture recognizer setup
model_path = 'gesture_recognizer.task'
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Global variables
gesture_texts = []
volume_mode_active = False
volume_toggle_ready = True  

def print_result(result, output_image, timestamp_ms):
    global gesture_texts
    gesture_texts = []
    if result.gestures and result.handedness:
        for hand, gesture in zip(result.handedness, result.gestures):
            gesture_texts.append((
                hand[0].category_name,
                gesture[0].category_name,
                gesture[0].score
            ))

def calculate_distance(landmark1, landmark2):
    return math.sqrt(
        (landmark1.x - landmark2.x) ** 2 +
        (landmark1.y - landmark2.y) ** 2 +
        (landmark1.z - landmark2.z) ** 2
    )

def adjust_volume(change):
    if change > 0:
        os.system("osascript -e 'set volume output volume (output volume of (get volume settings) + 5)'")
    elif change < 0:
        os.system("osascript -e 'set volume output volume (output volume of (get volume settings) - 5)'")

# Default distance threshold
default_distance = None

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result
)

try:
    with GestureRecognizer.create_from_options(options) as recognizer:
        cap = cv2.VideoCapture(0)
        timestamp = 0
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Process hands
            hand_results = hands.process(frame_rgb)
            if hand_results.multi_hand_landmarks:
                for landmarks in hand_results.multi_hand_landmarks:
                    mp_draw.draw_landmarks(
                        frame, landmarks, mp_hands.HAND_CONNECTIONS,
                        mp_draw.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                        mp_draw.DrawingSpec(color=(250, 44, 250), thickness=2)
                    )

                    # Detect middle finger and thumb pinch gesture to toggle volume mode
                    thumb_tip = landmarks.landmark[4]
                    middle_tip = landmarks.landmark[12]
                    pinch_distance = calculate_distance(thumb_tip, middle_tip)

                    if pinch_distance < 0.05 and volume_toggle_ready:  # Threshold for pinch gesture
                        volume_mode_active = not volume_mode_active
                        mode_status = "ON" if volume_mode_active else "OFF"
                        volume_toggle_ready = False  # Prevent multiple toggles in one pinch
                        cv2.putText(frame, f"Volume Mode {mode_status}", 
                                   (50, 100),
                                   cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                   (255, 255, 0), 2, cv2.LINE_AA)
                        print(f"Volume Mode {mode_status}")

                    if pinch_distance > 0.1:  # Reset toggle readiness when fingers move apart
                        volume_toggle_ready = True

                    # If volume mode is active, process thumb and index finger gestures for volume control
                    if volume_mode_active:
                        index_tip = landmarks.landmark[8]
                        thumb_tip = landmarks.landmark[4]
                        distance = calculate_distance(thumb_tip, index_tip)

                        if default_distance is None:
                            default_distance = distance  # Set the initial distance as default
                            print(f"Default distance set: {default_distance}")
                        else:
                            if distance < default_distance * 0.8:  # Threshold for volume up
                                cv2.putText(frame, "Volume Up!", 
                                           (50, 50),
                                           cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                           (0, 255, 0), 2, cv2.LINE_AA)
                                adjust_volume(1)
                                print("Volume Up Gesture Detected")
                            elif distance > default_distance * 1.2:  # Threshold for volume down
                                cv2.putText(frame, "Volume Down!", 
                                           (50, 50),
                                           cv2.FONT_HERSHEY_SIMPLEX, 1, 
                                           (0, 0, 255), 2, cv2.LINE_AA)
                                adjust_volume(-1)
                                print("Volume Down Gesture Detected")

            # Process gestures
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
            timestamp += 1
            recognizer.recognize_async(mp_image, timestamp)

            # Draw gesture labels
            for idx, (hand, gesture, score) in enumerate(gesture_texts):
                cv2.putText(frame, f"{gesture} ({score:.2f})", 
                           (10, 30 + idx * 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, 
                           (0, 255, 0), 2, cv2.LINE_AA)

            cv2.imshow('Hand Gesture Recognition', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

finally:
    if 'cap' in locals():
        cap.release()
    cv2.destroyAllWindows()
    hands.close()

I0000 00:00:1737516550.633138  573090 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1 Pro
I0000 00:00:1737516550.636445  573090 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1 Pro
W0000 00:00:1737516550.636827  573090 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1737516550.640161  573090 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1737516550.646241  585709 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1737516550.659069  585709 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1737516550.660089  585715 inference_feedback_manager.cc:114] Feedback manager requires a model with

Volume Mode ON
Default distance set: 0.032164906837013646
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Up Gesture Detected
Volume Up Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Mode OFF
Volume Mode ON
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected
Volume Down Gesture Detected


KeyboardInterrupt: 