In [1]:
pip install opencv-python mediapipe pycaw numpy comtypes

Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting mediapipe
  Using cached mediapipe-0.10.21-cp312-cp312-win_amd64.whl.metadata (10 kB)
Collecting pycaw
  Using cached pycaw-20240210-py3-none-any.whl.metadata (1.7 kB)
Collecting comtypes
  Using cached comtypes-1.4.11-py3-none-any.whl.metadata (7.2 kB)
Collecting jax (from mediapipe)
  Using cached jax-0.6.2-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe)
  Using cached jaxlib-0.6.2-cp312-cp312-win_amd64.whl.metadata (1.4 kB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached opencv_contrib_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Using cached sounddevice-0.5.2-py3-none-win_amd64.whl.metadata (1.6 kB)
Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
Using cached mediapipe-0.10.21-cp312-cp312-win_amd64.whl (51.0 MB)
Using cached pycaw-20240210-py3-non

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import math
import platform
import os
import sys
import time
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# Configuration
MIN_DIST = 0.02 # Minimum distance between fingers
MAX_DIST = 0.3   # Maximum distance between fingers
SMOOTHING_FACTOR = 7
CALIBRATION_TIME = 3  # Seconds for auto-calibration

class GestureVolumeControl:
    def __init__(self):
        self.cap = cv2.VideoCapture(0)
        self.frame_width = 640
        self.frame_height = 480
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.frame_width)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.frame_height)
        
        # Mediapipe setup
        self.mp_hands = mp.solutions.hands
        self.mp_drawing = mp.solutions.drawing_utils
        self.hands = self.mp_hands.Hands(
            max_num_hands=1,
            min_detection_confidence=0.8,
            min_tracking_confidence=0.5
        )
        
        # Volume control
        self.volume_control, self.system = self.init_volume_control()
        self.prev_vol = self.get_current_volume()
        self.vol_history = []
        
        # Auto-calibration
        self.min_dist = MIN_DIST
        self.max_dist = MAX_DIST
        self.calibrated = False
        
        # System tray icon (Windows only)
        self.tray_icon = None
        if platform.system() == 'Windows':
            self.setup_system_tray()

    def init_volume_control(self):
        system = platform.system()
        volume_control = None
        
        if system == 'Windows':
            devices = AudioUtilities.GetSpeakers()
            interface = devices.Activate(
                IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
            volume_control = cast(interface, POINTER(IAudioEndpointVolume))
        elif system == 'Darwin':  # macOS
            volume_control = "osascript"
        elif system == 'Linux':
            try:
                import alsaaudio
                mixer = alsaaudio.Mixer()
                volume_control = mixer
            except:
                print("Linux volume control requires alsaaudio")
        
        return volume_control, system

    def get_current_volume(self):
        if self.system == 'Windows' and self.volume_control:
            return self.volume_control.GetMasterVolumeLevelScalar()
        return 0.5  # Default

    def set_volume(self, volume_level):
        vol = max(0.0, min(1.0, volume_level))
        
        if self.system == 'Windows' and self.volume_control:
            self.volume_control.SetMasterVolumeLevelScalar(vol, None)
        elif self.system == 'Darwin' and self.volume_control == "osascript":
            vol_percent = int(vol * 100)
            os.system(f"osascript -e 'set volume output volume {vol_percent}'")
        elif self.system == 'Linux' and self.volume_control:
            self.volume_control.setvolume(int(vol * 100))

    def get_distance(self, landmark1, landmark2):
        return math.sqrt((landmark1.x - landmark2.x)**2 + 
                         (landmark1.y - landmark2.y)**2)

    def auto_calibrate(self):
        print("Calibrating... Show your hand with fingers fully open and closed")
        start_time = time.time()
        min_vals = []
        max_vals = []
        
        while time.time() - start_time < CALIBRATION_TIME:
            success, frame = self.cap.read()
            if not success:
                continue
                
            frame = cv2.flip(frame, 1)
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = self.hands.process(rgb_frame)
            
            if results.multi_hand_landmarks:
                for landmarks in results.multi_hand_landmarks:
                    thumb_tip = landmarks.landmark[4]
                    index_tip = landmarks.landmark[8]
                    distance = self.get_distance(thumb_tip, index_tip)
                    min_vals.append(distance)
                    max_vals.append(distance)
            
            cv2.putText(frame, "Calibrating...", (50, 50), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow('Calibration', frame)
            cv2.waitKey(1)
        
        if min_vals and max_vals:
            self.min_dist = min(min_vals) * 0.8
            self.max_dist = max(max_vals) * 1.2
            print(f"Calibration complete: Min={self.min_dist:.3f}, Max={self.max_dist:.3f}")
        self.calibrated = True
        cv2.destroyWindow('Calibration')

    def setup_system_tray(self):
        try:
            import pystray
            from PIL import Image
            
            # Create system tray icon
            image = Image.new('RGB', (64, 64), (70, 70, 180))
            menu = pystray.Menu(
                pystray.MenuItem('Exit', self.exit_app)
            )
            self.tray_icon = pystray.Icon(
                "GestureControl",
                image,
                "Gesture Volume Control",
                menu
            )
        except ImportError:
            print("System tray requires pystray and PIL")

    def run_tray_icon(self):
        if self.tray_icon:
            self.tray_icon.run_detached()

    def exit_app(self):
        self.cap.release()
        cv2.destroyAllWindows()
        if self.tray_icon:
            self.tray_icon.stop()
        sys.exit(0)

    def run(self):
        self.run_tray_icon()
        
        if not self.calibrated:
            self.auto_calibrate()
        
        while True:
            success, frame = self.cap.read()
            if not success:
                continue
                
            frame = cv2.flip(frame, 1)
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = self.hands.process(rgb_frame)
            
            volume_percent = self.prev_vol
            is_pinching = False
            
            if results.multi_hand_landmarks:
                for landmarks in results.multi_hand_landmarks:
                    self.mp_drawing.draw_landmarks(
                        frame, landmarks, self.mp_hands.HAND_CONNECTIONS)
                    
                    thumb_tip = landmarks.landmark[4]
                    index_tip = landmarks.landmark[8]
                    distance = self.get_distance(thumb_tip, index_tip)
                    is_pinching = distance < self.min_dist * 1.5
                    
                    # Map distance to volume
                    vol = np.interp(distance, [self.min_dist, self.max_dist], [0, 1])
                    
                    # Smooth volume
                    self.vol_history.append(vol)
                    if len(self.vol_history) > SMOOTHING_FACTOR:
                        self.vol_history.pop(0)
                    smoothed_vol = sum(self.vol_history) / len(self.vol_history)
                    
                    volume_percent = smoothed_vol
                    self.prev_vol = volume_percent
                    self.set_volume(volume_percent)
                    
                    # Visual feedback
                    h, w, _ = frame.shape
                    thumb_pos = (int(thumb_tip.x * w), int(thumb_tip.y * h))
                    index_pos = (int(index_tip.x * w), int(index_tip.y * h))
                    
                    color = (0, 255, 0) if is_pinching else (0, 0, 255)
                    cv2.line(frame, thumb_pos, index_pos, color, 3)
                    cv2.circle(frame, thumb_pos, 8, color, cv2.FILLED)
                    cv2.circle(frame, index_pos, 8, color, cv2.FILLED)
            
            # UI Elements
            cv2.rectangle(frame, (50, 150), (85, 400), (0, 0, 255), 3)
            cv2.rectangle(frame, (50, int(400 - volume_percent * 250)), 
                         (85, 400), (0, 0, 255), cv2.FILLED)
            cv2.putText(frame, f"{int(volume_percent * 100)}%", 
                       (50, 450), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
            status_text = "ACTIVE" if is_pinching else "READY"
            cv2.putText(frame, f"Status: {status_text}", 
                       (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                       (0, 255, 0) if is_pinching else (0, 0, 255), 2)
            
            cv2.putText(frame, "Press ESC to exit", 
                       (self.frame_width - 200, self.frame_height - 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
            
            cv2.imshow('Gesture Volume Control', frame)
            
            # Exit on ESC
            if cv2.waitKey(1) == 27:
                self.exit_app()

if __name__ == "__main__":
    app = GestureVolumeControl()
    app.run()

System tray requires pystray and PIL
Calibrating... Show your hand with fingers fully open and closed


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
