In [1]:
import cv2 
import mediapipe as mp 
from math import hypot 
import numpy as np 
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import pyautogui
from google.protobuf.json_format import MessageToDict

# Right Hand for Volume
# Left Hand for Media Control

# Open the default camera (index 0)
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)

# Initialize MediaPipe Hands module
mpHands = mp.solutions.hands 
hands = mpHands.Hands(min_detection_confidence=0.75, min_tracking_confidence=0.75)
mpDraw = mp.solutions.drawing_utils

# Get the audio devices and initialize volume control
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# Get the volume range
volMin,volMax = volume.GetVolumeRange()[:2]

# Initialize the variable to store the previous finger count
prev_fingers = 0

while True:
    # Read frame from the camera
    success,img = cap.read()
    # Flip the frame horizontally
    img = cv2.flip(img,1)
    # Convert BGR image to RGB for processing with Mediapipe
    imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    # Process the frame to detect hands
    results = hands.process(imgRGB)

    # Lists to store landmark positions of left and right hands
    left_lmList,right_lmList = [],[]
    
    # Process each hand in the frame
    if results.multi_hand_landmarks and results.multi_handedness:
        for i in range(len(results.multi_hand_landmarks)):
            hand_landmarks = results.multi_hand_landmarks[i]
            label = MessageToDict(results.multi_handedness[i])['classification'][0]['label']
            for lm in hand_landmarks.landmark:
                h,w,_ = img.shape
                # Store the landmark positions for left and right hands
                if label == 'Left':
                    left_lmList.append([int(lm.x*w),int(lm.y*h)])  
                elif label == 'Right':
                    right_lmList.append([int(lm.x*w),int(lm.y*h)])  
    
    # Process left hand for media control
    if left_lmList:
        # Count fingers
        fingers = 0
        if left_lmList:
            if left_lmList[4][1] > left_lmList[3][1]:
                fingers += 1
            if left_lmList[8][1] > left_lmList[6][1]:
                fingers += 1
            if left_lmList[12][1] > left_lmList[10][1]:
                fingers += 1
            if left_lmList[16][1] > left_lmList[14][1]:
                fingers += 1
            if left_lmList[20][1] > left_lmList[18][1]:
                fingers += 1
                
            # Perform media control actions based on finger count
            if fingers == 1:
                if prev_fingers != 1:
                    # Simulate keyboard shortcut for previous track (Ctrl + B)
                    pyautogui.hotkey('ctrl', 'b')
            elif fingers == 2:
                if prev_fingers != 2:
                    # Simulate keyboard shortcut for play/pause (Ctrl + P)
                    pyautogui.hotkey('ctrl', 'p')
            elif fingers == 3:
                if prev_fingers != 3:
                    # Simulate keyboard shortcut for next track (Ctrl + F)
                    pyautogui.hotkey('ctrl', 'f')
            prev_fingers = fingers
    
    # Process right hand for volume control
    if right_lmList:
        x1,y1 = right_lmList[4][0],right_lmList[4][1]
        x2,y2 = right_lmList[8][0],right_lmList[8][1]

        cv2.line(img,(x1,y1),(x2,y2),(0,255,0),3)

        length = hypot(x2-x1,y2-y1)

        # Volume control based on the distance between thumb and index finger
        if length < 50:
            volume.SetMasterVolumeLevel(volMin, None)  # Minimum volume
        elif length > 200:
            volume.SetMasterVolumeLevel(volMax, None)  # Maximum volume
        else:
            vol = np.interp(length,[50,200],[volMin,volMax])
            volume.SetMasterVolumeLevel(vol, None)
        
    # Display the annotated frame
    cv2.imshow('Image',img)
    
    # Check for exit key
    if cv2.waitKey(1) & 0xff==ord('q'):
        break

# Release the camera and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()
