In [1]:
import cv2
import mediapipe as mp
import pyautogui
import random
import numpy as np
from pynput.mouse import Button, Controller
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

In [2]:
mouse = Controller()
screen_width, screen_height = pyautogui.size()

devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volRange = volume.GetVolumeRange()
minVol, maxVol = volRange[0], volRange[1]


mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7, min_tracking_confidence=0.7)
draw = mp.solutions.drawing_utils

def get_angle(a, b, c):
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(np.degrees(radians))
    return angle

def get_distance(p1, p2):
    return np.hypot(p2[0] - p1[0], p2[1] - p1[1])

def is_fist(landmarks):
    folded = True
    for tip, mcp in [(8,5), (12,9), (16,13), (20,17)]:
        if landmarks[tip][1] < landmarks[mcp][1]:  # Finger is up
            folded = False
    return folded

def is_open_hand(landmarks):
    for tip, mcp in [(8,5), (12,9), (16,13), (20,17)]:
        if landmarks[tip][1] > landmarks[mcp][1]:
            return False
    return True

def get_hand_label(handedness):
    return handedness.classification[0].label

def find_finger_tip(landmarks, mp_hand):
    return landmarks[mp_hand.INDEX_FINGER_TIP]

def move_mouse(index_finger_tip):
    x = int(index_finger_tip.x * screen_width)
    y = int(index_finger_tip.y / 2 * screen_height)
    pyautogui.moveTo(x, y)

def detect_gestures(frame, results):
    if results.multi_hand_landmarks:
        hands_data = list(zip(results.multi_hand_landmarks, results.multi_handedness))
        volume_active = False
        volume_percent = 0

        for hand_landmarks, handedness in hands_data:
            label = get_hand_label(handedness)
            lmList = [(lm.x, lm.y) for lm in hand_landmarks.landmark]

            if label == "Right":
                index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                thumb_index_dist = get_distance(lmList[4], lmList[5])

                if thumb_index_dist < 0.05 and get_angle(lmList[5], lmList[6], lmList[8]) > 90:
                    move_mouse(index_tip)
                elif get_angle(lmList[5], lmList[6], lmList[8]) < 50 and get_angle(lmList[9], lmList[10], lmList[12]) > 90 and thumb_index_dist > 0.05:
                    mouse.press(Button.left); mouse.release(Button.left)
                    cv2.putText(frame, "Left Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
                elif get_angle(lmList[9], lmList[10], lmList[12]) < 50 and get_angle(lmList[5], lmList[6], lmList[8]) > 90 and thumb_index_dist > 0.05:
                    mouse.press(Button.right); mouse.release(Button.right)
                    cv2.putText(frame, "Right Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
                elif get_angle(lmList[5], lmList[6], lmList[8]) < 50 and get_angle(lmList[9], lmList[10], lmList[12]) < 50 and thumb_index_dist > 0.05:
                    pyautogui.doubleClick()
                    cv2.putText(frame, "Double Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
                elif get_angle(lmList[5], lmList[6], lmList[8]) < 50 and get_angle(lmList[9], lmList[10], lmList[12]) < 50 and thumb_index_dist < 0.05:
                    pyautogui.screenshot(f"screenshot_{random.randint(1,1000)}.png")
                    cv2.putText(frame, "Screenshot", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,100,100), 2)
                elif is_fist(lmList):
                    pyautogui.hotkey('alt', 'f4')
                    cv2.putText(frame, "Window Closed", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255), 2)

            
            elif label == "Left":
                index_dist = get_distance(lmList[8], lmList[12])
                volume_active = True
                vol = np.interp(index_dist, [0.01, 0.15], [minVol, maxVol])
                volume_percent = int(np.interp(index_dist, [0.01, 0.15], [0, 100]))
                volume.SetMasterVolumeLevel(vol, None)
                if is_fist(lmList):
                    pyautogui.scroll(-50)
                    cv2.putText(frame, "Scroll Down", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 255, 100), 2)
                elif is_open_hand(lmList):
                    pyautogui.scroll(50)
                    cv2.putText(frame, "Scroll Up", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 100, 200), 2)

        if volume_active:
            cv2.rectangle(frame, (30, 150), (55, 400), (255, 255, 255), 2)
            cv2.rectangle(frame, (30, 400 - int(2.5 * volume_percent)), (55, 400), (0, 255, 0), -1)
            cv2.putText(frame, f'{volume_percent} %', (25, 430), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

def main():
    cap = cv2.VideoCapture(0)
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.flip(frame, 1)
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame_rgb)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            detect_gestures(frame, results)
            cv2.imshow('Virtual Mouse', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
