In [1]:
import cv2                                        # import necessary libraries and modules
import mediapipe as mp                            # MediaPipe for hand tracking
import pyautogui                                  # PyAutoGUI for simulating keyboard events
from ctypes import cast, POINTER                  # used for casting COM objects
from comtypes import CLSCTX_ALL                   # specifies the context for COM object activation
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume # PyCAW for audio control

In [2]:
vdo=cv2.VideoCapture(0)  # a video capture object to capture video
mphands=mp.solutions.hands
hands=mphands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing=mp.solutions.drawing_utils  # MediaPipe drawing utils object to draw landmarks on the frame

devices=AudioUtilities.GetSpeakers()
interface=devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume=cast(interface, POINTER(IAudioEndpointVolume))

while True:
    suc,frame=vdo.read()
    if not suc:
        break

    results=hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # process the frame to detect hand landmarks using the Hands model

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mphands.HAND_CONNECTIONS)

            # get the y-coordinate of the index finger tip and thumb tip landmarks
            index_f=hand_landmarks.landmark[mphands.HandLandmark.INDEX_FINGER_TIP].y
            thumb_f=hand_landmarks.landmark[mphands.HandLandmark.THUMB_TIP].y

            # determining the gesture based on finger positions
            if index_f<thumb_f:
                gesture='pointing up'
                cv2.putText(frame, 'Increasing Volume', (10,110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 3)

            elif index_f>thumb_f:
                gesture='pointing down'
                cv2.putText(frame, 'Reducing Volume', (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)

            else:
                gesture='not detected'

            # performing volume control based on the detected gesture
            if gesture=='pointing up':
                pyautogui.press('volumeup')

            elif gesture=='pointing down':
                pyautogui.press('volumedown')


            current_volume=volume.GetMasterVolumeLevelScalar()  # the current volume level

            # volume status on the frame based on the current volume level
            if current_volume==0:
                cv2.putText(frame, 'Mute', (10,70), cv2.FONT_HERSHEY_DUPLEX, 2, (0,128,0), 4)

            elif current_volume==1.0:
                cv2.putText(frame, 'Max', (10,70), cv2.FONT_HERSHEY_DUPLEX, 2, (0,100,255), 4)


    cv2.imshow('Volume Gesture',frame)
    if cv2.waitKey(1) & 0xFF==ord('q'):
        break

vdo.release()
cv2.destroyAllWindows() # release the video capture