In [5]:
pip install pycaw comtypes


Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install opencv-python mediapipe pyautogui numpy


Note: you may need to restart the kernel to use updated packages.


In [7]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import time

# Webcam settings
wCam, hCam = 640, 480
frameR = 50
smoothening = 7
pLocX, pLocY = 0, 0
cLocX, cLocY = 0, 0

# Initialize webcam
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)

# Mediapipe setup
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils

# Screen size
screenWidth, screenHeight = pyautogui.size()

# System volume setup
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volumeControl = cast(interface, POINTER(IAudioEndpointVolume))
volMin, volMax = volumeControl.GetVolumeRange()[:2]

# Volume toggle state
volumeControlActive = False
toggleCooldown = 0

# Finger tip landmark indices
tipIds = [4, 8, 12, 16, 20]

while True:
    success, img = cap.read()
    img = cv2.flip(img, 1)
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)

    lmList = []
    if results.multi_hand_landmarks:
        handLms = results.multi_hand_landmarks[0]
        for id, lm in enumerate(handLms.landmark):
            h, w, c = img.shape
            cx, cy = int(lm.x * w), int(lm.y * h)
            lmList.append((id, cx, cy))
        mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)

    if lmList:
        # Get required landmarks
        x1, y1 = lmList[8][1], lmList[8][2]   # Index tip
        x2, y2 = lmList[4][1], lmList[4][2]   # Thumb tip
        x3, y3 = lmList[12][1], lmList[12][2] # Middle tip

        # Rectangle area
        cv2.rectangle(img, (frameR, frameR), (wCam - frameR, hCam - frameR), (255, 0, 255), 2)

        # Detect which fingers are up
        fingers = []

        # Thumb
        if lmList[tipIds[0]][1] > lmList[tipIds[0] - 1][1]:
            fingers.append(1)
        else:
            fingers.append(0)

        # Other four fingers
        for id in range(1, 5):
            if lmList[tipIds[id]][2] < lmList[tipIds[id] - 2][2]:
                fingers.append(1)
            else:
                fingers.append(0)

        # Peace Sign Detection (Index and Middle fingers up, rest down)
        currentTime = time.time()
        if fingers[1] == 1 and fingers[2] == 1 and fingers[3] == 0 and fingers[4] == 0 and (currentTime - toggleCooldown) > 1:
            volumeControlActive = not volumeControlActive
            toggleCooldown = currentTime

        # Volume Control Active
        if volumeControlActive:
            lengthVol = math.hypot(x3 - x2, y3 - y2)
            vol = np.interp(lengthVol, [20, 200], [volMin, volMax])
            volumeControl.SetMasterVolumeLevel(vol, None)

            cv2.line(img, (x2, y2), (x3, y3), (0, 255, 0), 2)
            cv2.putText(img, 'Volume Active', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Mouse Control Active
        else:
            # Cursor Movement with Index Finger
            xMapped = np.interp(x1, (frameR, wCam - frameR), (0, screenWidth))
            yMapped = np.interp(y1, (frameR, hCam - frameR), (0, screenHeight))
            cLocX = pLocX + (xMapped - pLocX) / smoothening
            cLocY = pLocY + (yMapped - pLocY) / smoothening
            pyautogui.moveTo(cLocX, cLocY)
            pLocX, pLocY = cLocX, cLocY

            # Left Click with Thumb and Index Finger
            lengthClick = math.hypot(x2 - x1, y2 - y1)
            if lengthClick < 30:
                cv2.circle(img, (x1, y1), 10, (0, 255, 0), cv2.FILLED)
                pyautogui.click()
                time.sleep(0.2)

            cv2.putText(img, 'Mouse Active', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

    # Show camera feed
    cv2.imshow("Hand Gesture Mouse & Volume Control", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 