In [3]:
import cv2
import mediapipe as mp
import numpy as np
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume



In [4]:
# Initialize MediaPipe Hand detection and drawing modules
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.5)
mp_draw = mp.solutions.drawing_utils


In [5]:
# Initialize Pycaw for controlling system volume
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# Get the current volume range
vol_range = volume.GetVolumeRange()
min_vol = vol_range[1]  # Minimum volume (usually -96.0)
max_vol = vol_range[0]  # Maximum volume (usually 0.0)

# Open webcam video capture
cap = cv2.VideoCapture(0)

# Define the coordinates for the green rectangle (box) where gestures will be detected
box_x1, box_y1, box_x2, box_y2 = 100, 100, 400, 400

while True:
    success, img = cap.read()
    
    if not success:
        break
    
    # Convert the image from BGR to RGB for MediaPipe processing
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the coordinates of the index finger tip (landmark 8) and thumb tip (landmark 4)
            index_finger_tip = hand_landmarks.landmark[8]
            thumb_tip = hand_landmarks.landmark[4]

            # Convert normalized coordinates to pixel values
            h, w, _ = img.shape
            x1, y1 = int(index_finger_tip.x * w), int(index_finger_tip.y * h)
            x2, y2 = int(thumb_tip.x * w), int(thumb_tip.y * h)

            # Check if both the index finger and thumb are inside the green rectangle (box)
            if (box_x1 < x1 < box_x2 and box_y1 < y1 < box_y2 and
                box_x1 < x2 < box_x2 and box_y1 < y2 < box_y2):
                
                # Draw landmarks and connections for the detected hand
                mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                
                # Draw circles on the index finger and thumb
                cv2.circle(img, (x1, y1), 10, (255, 0, 0), cv2.FILLED)
                cv2.circle(img, (x2, y2), 10, (255, 0, 0), cv2.FILLED)

                # Calculate the distance between the index finger and thumb
                distance = math.hypot(x2 - x1, y2 - y1)

                # Map the distance between fingers to the system volume range
                vol = np.interp(distance, [20, 200], [min_vol, max_vol])
                volume.SetMasterVolumeLevel(vol, None)

                # ✅ FIX: Correct volume percentage display
                volume_percentage = np.interp(distance, [20, 200], [0, 100])  # Map from distance
                cv2.putText(img, f'Volume: {int(volume_percentage)}%', (50, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    
    # Draw the green rectangle (box) on the screen
    cv2.rectangle(img, (box_x1, box_y1), (box_x2, box_y2), (0, 255, 0), 2)

    # Display the result on the screen
    cv2.imshow("Hand Gesture Volume Control", img)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break




In [None]:
# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()