# pip install mediapipe

# pip install comtypes

# pip install pycaw

In [5]:
import cv2
import mediapipe as mp
import numpy as np
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# Initialize Mediapipe and Pycaw
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Set up Pycaw for volume control
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
    IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# Get the volume range
vol_range = volume.GetVolumeRange()
min_vol = vol_range[0]
max_vol = vol_range[1]

def calculate_distance(point1, point2):
    return np.linalg.norm(np.array(point1) - np.array(point2))

def volume_control_from_distance(distance, min_distance, max_distance):
    # Normalize the distance
    norm_distance = np.clip((distance - min_distance) / (max_distance - min_distance), 0, 1)
    # Map the normalized distance to the volume range
    return norm_distance * (max_vol - min_vol) + min_vol

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Set up Mediapipe hand detection
with mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7) as hands:
    while cap.isOpened():
        ret, frame = cap.read()
        
        if not ret:
            print("Failed to grab frame")
            break

        # Flip the frame horizontally for a later selfie-view display
        frame = cv2.flip(frame, 1)
        
        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process the frame and find hands
        result = hands.process(rgb_frame)
        
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                # Draw the hand landmarks on the frame
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                
                # Get coordinates of the thumb tip (landmark 4) and index finger tip (landmark 8)
                thumb_tip = hand_landmarks.landmark[4]
                index_tip = hand_landmarks.landmark[8]
                
                # Convert normalized coordinates to pixel values
                h, w, _ = frame.shape
                thumb_tip_coords = (int(thumb_tip.x * w), int(thumb_tip.y * h))
                index_tip_coords = (int(index_tip.x * w), int(index_tip.y * h))
                
                # Calculate the distance between thumb and index finger tips
                distance = calculate_distance(thumb_tip_coords, index_tip_coords)
                
                # Map the distance to the volume range and set the system volume
                vol = volume_control_from_distance(distance, 30, 200)  # Adjust min_distance and max_distance as needed
                volume.SetMasterVolumeLevel(vol, None)
                
                # Display the distance and volume on the frame
                cv2.putText(frame, f'Distance: {int(distance)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
                cv2.putText(frame, f'Volume: {int((vol - min_vol) / (max_vol - min_vol) * 100)}%', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        
        # Display the frame with the contours drawn
        cv2.imshow('Gesture Volume Control', frame)
        
        # Break the loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()

