In [1]:
#VOLUME CONTROL
import sys
import platform
import threading
from ctypes import POINTER, cast
from functools import wraps
import os
import math
import time

import cv2
import mediapipe as mp
import tkinter as tk
from tkinter import ttk, Scale, Button, HORIZONTAL, StringVar, W, E, Label
from PIL import Image, ImageTk

try:
    if platform.system() != "Windows":
        raise SystemExit("This script runs only on Windows.")

    from comtypes import CLSCTX_ALL, CoInitialize, CoUninitialize, GUID
    from comtypes.client import CreateObject
    from pycaw.pycaw import IAudioEndpointVolume, IMMDeviceEnumerator

except ImportError as e:
    print("Error: Missing pycaw, comtypes, or other required packages. Did you run 'pip install pycaw comtypes opencv-python mediapipe Pillow'?", file=sys.stderr)
    raise SystemExit(f"Required component import failed: {e}")

eRender = 0
eMultimedia = 1

def ensure_com(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        CoInitialize()
        try:
            return func(*args, **kwargs)
        except Exception as e:
            print(f"Exception in COM handler ({func.__name__}): {e}", file=sys.stderr)
            import traceback
            traceback.print_exc()
        finally:
            CoUninitialize()
    return wrapper

def _create_mmdevice_enumerator():
    try:
        return CreateObject("MMDeviceEnumerator.MMDeviceEnumerator", interface=IMMDeviceEnumerator)
    except Exception:
        clsid = GUID("{BCDE0395-E52F-467C-8E3D-C4579291692E}")
        return CreateObject(clsid, interface=IMMDeviceEnumerator)

@ensure_com
def _get_volume_interface_for_default():
    enumerator = _create_mmdevice_enumerator()
    default_device = enumerator.GetDefaultAudioEndpoint(eRender, eMultimedia)
    iface = default_device.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
    return cast(iface, POINTER(IAudioEndpointVolume))

def _percent_to_scalar(p):
    return max(0.0, min(1.0, p / 100.0))

def _scalar_to_percent(s):
    return max(0.0, min(100.0, s * 100.0))

class PycawVolumeHandler:
    @ensure_com
    def get_initial_state(self):
        vol = _get_volume_interface_for_default()
        cur_scalar = float(vol.GetMasterVolumeLevelScalar())
        cur_pct = round(_scalar_to_percent(cur_scalar))
        is_muted = bool(vol.GetMute())
        return cur_pct, is_muted

    @ensure_com
    def set_volume_and_handle_mute(self, new_pct, current_is_muted):
        vol = _get_volume_interface_for_default()
        
        vol.SetMasterVolumeLevelScalar(_percent_to_scalar(new_pct), None)
        
        new_is_muted = current_is_muted
        if new_pct == 0:
            if not current_is_muted:
                vol.SetMute(1, None)
                new_is_muted = True
        elif new_pct > 0 and current_is_muted:
            vol.SetMute(0, None)
            new_is_muted = False
            
        return new_is_muted

    @ensure_com
    def toggle_mute_state(self, current_is_muted):
        vol = _get_volume_interface_for_default()
        new_is_muted = not current_is_muted
        vol.SetMute(1 if new_is_muted else 0, None)
        return new_is_muted

class VideoProcessor(threading.Thread):
    
    FINGER_TIP_IDS = [mp.solutions.hands.HandLandmark.INDEX_FINGER_TIP, 
                      mp.solutions.hands.HandLandmark.MIDDLE_FINGER_TIP, 
                      mp.solutions.hands.HandLandmark.RING_FINGER_TIP, 
                      mp.solutions.hands.HandLandmark.PINKY_TIP]
    
    FINGER_PIP_IDS = [mp.solutions.hands.HandLandmark.INDEX_FINGER_PIP, 
                      mp.solutions.hands.HandLandmark.MIDDLE_FINGER_PIP, 
                      mp.solutions.hands.HandLandmark.RING_FINGER_PIP, 
                      mp.solutions.hands.HandLandmark.PINKY_PIP]

    def __init__(self, *args, **kwargs):
        super().__init__()
        self.running = True
        
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False, 
            model_complexity=1, 
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            max_num_hands=1
        )
        self.mp_drawing = mp.solutions.drawing_utils
        
        self.current_frame = None
        self.volume_pct = 0
        self.distance_text = "Fingers: N/A" 
        self.volume_changed = False

    def _count_extended_fingers(self, hand_landmarks):
        finger_count = 0
        
        lm_thumb_tip = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.THUMB_TIP]
        lm_thumb_mcp = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.THUMB_MCP]
        
        if lm_thumb_tip.x < lm_thumb_mcp.x:
            finger_count += 1
            
        for tip_id, pip_id in zip(self.FINGER_TIP_IDS, self.FINGER_PIP_IDS):
            lm_tip = hand_landmarks.landmark[tip_id]
            lm_pip = hand_landmarks.landmark[pip_id]
            
            if lm_tip.y < lm_pip.y:
                finger_count += 1
                
        return finger_count

    def run(self):
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        
        frame_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        while self.running:
            success, img = self.cap.read()
            if not success:
                time.sleep(0.01)
                continue

            img = cv2.flip(img, 1)
            imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = self.hands.process(imgRGB)
            
            self.volume_changed = False
            new_volume_pct = self.volume_pct

            if results.multi_hand_landmarks:
                hand_landmarks = results.multi_hand_landmarks[0]
                
                finger_count = self._count_extended_fingers(hand_landmarks)
                
                new_volume_pct = int(finger_count * 20)
                
                lm_wrist = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.WRIST]
                wx, wy = int(lm_wrist.x * frame_width), int(lm_wrist.y * frame_height)
                cv2.circle(img, (wx, wy), 10, (0, 255, 0), cv2.FILLED)
                
                self.mp_drawing.draw_landmarks(img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)
                cv2.putText(img, f'VOL: {new_volume_pct}%', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                self.distance_text = f"Fingers: {finger_count}/5"
                self.volume_changed = True

            else:
                self.distance_text = "Fingers: No Hand Detected"

            self.current_frame = img
            self.volume_pct = new_volume_pct
            
            time.sleep(0.02)

    def stop(self):
        self.running = False
        if hasattr(self, 'cap') and self.cap.isOpened():
            self.cap.release()

class GestureVolumeApp:
    def __init__(self, window, window_title):
        self.window = window
        self.window.title(window_title)
        
        self.volume_handler = PycawVolumeHandler()
        self.current_volume_pct, self.is_muted = self.volume_handler.get_initial_state()
        
        self.processor = VideoProcessor()
        self.processor.start()
        
        self.setup_gui()
        
        self.delay = 30
        self.update()

        self.window.protocol("WM_DELETE_WINDOW", self.on_closing)
        self.window.mainloop()

    def setup_gui(self):
        self.distance_var = tk.StringVar(self.window, value="Fingers: N/A")
        self.volume_var = tk.StringVar(self.window, value=str(self.current_volume_pct))
        self.mute_text_var = tk.StringVar(self.window)
        
        style = ttk.Style()
        style.theme_use('clam')
        
        main_frame = ttk.Frame(self.window, padding="10 10 10 10")
        main_frame.pack(fill='both', expand=True)
        
        self.video_label = ttk.Label(main_frame, borderwidth=2, relief="groove")
        self.video_label.pack(pady=10)
        
        Label(main_frame, text="System Volume:").pack(pady=(5, 0))
        self.volume_scale = Scale(
            main_frame, 
            from_=0, to=100, orient=HORIZONTAL, length=300, resolution=1,
            variable=self.volume_var, state='disabled',
            troughcolor="#A0E0FF", highlightbackground="#CCCCCC"
        )
        self.volume_scale.pack(pady=5)
        
        distance_frame = ttk.Frame(main_frame)
        distance_frame.pack(fill='x', pady=5)
        ttk.Label(distance_frame, text="Finger Count:", font=('Arial', 12)).pack(side='left', padx=(20, 5)) 
        self.distance_info_label = ttk.Label(
            distance_frame, textvariable=self.distance_var,
            font=('Arial', 14, 'bold'), foreground='#1E88E5'
        )
        self.distance_info_label.pack(side='left', fill='x', expand=True)

        self.mute_button = Button(
            main_frame, textvariable=self.mute_text_var, command=self.toggle_mute,
            fg="white", relief="raised", font=('Arial', 10, 'bold')
        )
        self.mute_button.pack(fill='x', pady=10)
        
        self._update_mute_button_text()
        
        ttk.Label(
            main_frame, 
            text="Control volume by the number of fingers extended (0=0%, 5=100%).",
            font=('Arial', 10, 'italic')
        ).pack(pady=5)


    def _update_mute_button_text(self):
        if self.is_muted:
            self.mute_text_var.set("UNMUTE (Muted)")
            self.mute_button.config(bg="#FF6666", activebackground="#DD4444")
        else:
            self.mute_text_var.set("MUTE (Active)")
            self.mute_button.config(bg="#66FF66", activebackground="#44DD44")
            
    def toggle_mute(self):
        self.is_muted = self.volume_handler.toggle_mute_state(self.is_muted)
        self._update_mute_button_text()


    def update(self):
        
        if self.processor.current_frame is not None:
            img = self.processor.current_frame
            self.photo = ImageTk.PhotoImage(image=Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)))
            self.video_label.config(image=self.photo)
            
            self.distance_var.set(self.processor.distance_text)

        if self.processor.volume_changed:
            new_volume_pct = self.processor.volume_pct
            
            self.volume_var.set(str(new_volume_pct))
            
            self.is_muted = self.volume_handler.set_volume_and_handle_mute(
                new_volume_pct, self.is_muted
            )
            self.processor.volume_changed = False
            self._update_mute_button_text()
            
        self.window.after(self.delay, self.update)
        
    def on_closing(self):
        print("Stopping video processor thread and closing application...")
        self.processor.stop()
        self.processor.join()
        cv2.destroyAllWindows()
        self.window.destroy()

if __name__ == "__main__":
    root = tk.Tk()
    app = GestureVolumeApp(root, "Gesture Volume Control (Finger Count)")



Stopping video processor thread and closing application...


In [1]:
import sys
import platform
import threading
from ctypes import POINTER, cast
from functools import wraps
import os
import math
import time

import cv2
import mediapipe as mp
import tkinter as tk
from tkinter import ttk, Scale, Button, HORIZONTAL, StringVar, W, E, Label
from PIL import Image, ImageTk

try:
    if platform.system() != "Windows":
        raise SystemExit("This script runs only on Windows.")

    from comtypes import CLSCTX_ALL, CoInitialize, CoUninitialize, GUID
    from comtypes.client import CreateObject
    from pycaw.pycaw import IAudioEndpointVolume, IMMDeviceEnumerator

except ImportError as e:
    print("Error: Missing pycaw, comtypes, or other required packages. Did you run 'pip install pycaw comtypes opencv-python mediapipe Pillow'?", file=sys.stderr)
    raise SystemExit(f"Required component import failed: {e}")

eCapture = 1
eMultimedia = 1

def ensure_com(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        CoInitialize()
        try:
            return func(*args, **kwargs)
        except Exception as e:
            print(f"Exception in COM handler ({func.__name__}): {e}", file=sys.stderr)
            import traceback
            traceback.print_exc()
        finally:
            CoUninitialize()
    return wrapper

def _create_mmdevice_enumerator():
    try:
        return CreateObject("MMDeviceEnumerator.MMDeviceEnumerator", interface=IMMDeviceEnumerator)
    except Exception:
        clsid = GUID("{BCDE0395-E52F-467C-8E3D-C4579291692E}")
        return CreateObject(clsid, interface=IMMDeviceEnumerator)

@ensure_com
def _get_volume_interface_for_default():
    enumerator = _create_mmdevice_enumerator()
    default_device = enumerator.GetDefaultAudioEndpoint(eCapture, eMultimedia)
    iface = default_device.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
    return cast(iface, POINTER(IAudioEndpointVolume))

def _percent_to_scalar(p):
    return max(0.0, min(1.0, p / 100.0))

def _scalar_to_percent(s):
    return max(0.0, min(100.0, s * 100.0))

class PycawVolumeHandler:
    @ensure_com
    def get_initial_state(self):
        vol = _get_volume_interface_for_default()
        cur_scalar = float(vol.GetMasterVolumeLevelScalar())
        cur_pct = round(_scalar_to_percent(cur_scalar))
        is_muted = bool(vol.GetMute())
        return cur_pct, is_muted

    @ensure_com
    def set_volume_and_handle_mute(self, new_pct, current_is_muted):
        vol = _get_volume_interface_for_default()
        
        vol.SetMasterVolumeLevelScalar(_percent_to_scalar(new_pct), None)
        
        new_is_muted = current_is_muted
        if new_pct == 0:
            if not current_is_muted:
                vol.SetMute(1, None)
                new_is_muted = True
        elif new_pct > 0 and current_is_muted:
            vol.SetMute(0, None)
            new_is_muted = False
            
        return new_is_muted

    @ensure_com
    def toggle_mute_state(self, current_is_muted):
        vol = _get_volume_interface_for_default()
        new_is_muted = not current_is_muted
        vol.SetMute(1 if new_is_muted else 0, None)
        return new_is_muted

class VideoProcessor(threading.Thread):
    
    FINGER_TIP_IDS = [mp.solutions.hands.HandLandmark.INDEX_FINGER_TIP, 
                      mp.solutions.hands.HandLandmark.MIDDLE_FINGER_TIP, 
                      mp.solutions.hands.HandLandmark.RING_FINGER_TIP, 
                      mp.solutions.hands.HandLandmark.PINKY_TIP]
    
    FINGER_PIP_IDS = [mp.solutions.hands.HandLandmark.INDEX_FINGER_PIP, 
                      mp.solutions.hands.HandLandmark.MIDDLE_FINGER_PIP, 
                      mp.solutions.hands.HandLandmark.RING_FINGER_PIP, 
                      mp.solutions.hands.HandLandmark.PINKY_PIP]

    def __init__(self, *args, **kwargs):
        super().__init__()
        self.running = True
        
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False, 
            model_complexity=1, 
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            max_num_hands=1
        )
        self.mp_drawing = mp.solutions.drawing_utils
        
        self.current_frame = None
        self.volume_pct = 0
        self.distance_text = "Fingers: N/A" 
        self.volume_changed = False

    def _count_extended_fingers(self, hand_landmarks):
        finger_count = 0
        
        lm_thumb_tip = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.THUMB_TIP]
        lm_thumb_ip = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.THUMB_IP] 
        lm_thumb_mcp = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.THUMB_MCP]
        
        lm_index_mcp = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.INDEX_FINGER_MCP]
        is_right_hand = lm_thumb_mcp.x > lm_index_mcp.x
        
        if (is_right_hand and lm_thumb_tip.x > lm_thumb_ip.x) or \
           (not is_right_hand and lm_thumb_tip.x < lm_thumb_ip.x):
            finger_count += 1
            
        for tip_id, pip_id in zip(self.FINGER_TIP_IDS, self.FINGER_PIP_IDS):
            lm_tip = hand_landmarks.landmark[tip_id]
            lm_pip = hand_landmarks.landmark[pip_id]
            
            if lm_tip.y < lm_pip.y:
                finger_count += 1
                
        return finger_count

    def run(self):
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        
        frame_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        while self.running:
            success, img = self.cap.read()
            if not success:
                time.sleep(0.01)
                continue

            img = cv2.flip(img, 1)
            imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = self.hands.process(imgRGB)
            
            self.volume_changed = False
            new_volume_pct = self.volume_pct

            if results.multi_hand_landmarks:
                hand_landmarks = results.multi_hand_landmarks[0]
                
                finger_count = self._count_extended_fingers(hand_landmarks)
                
                new_volume_pct = int(finger_count * 20)
                
                lm_wrist = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.WRIST]
                wx, wy = int(lm_wrist.x * frame_width), int(lm_wrist.y * frame_height)
                cv2.circle(img, (wx, wy), 10, (0, 255, 0), cv2.FILLED)
                
                self.mp_drawing.draw_landmarks(img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)
                cv2.putText(img, f'MIC VOL: {new_volume_pct}%', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                self.distance_text = f"Fingers: {finger_count}/5"
                self.volume_changed = True

            else:
                self.distance_text = "Fingers: No Hand Detected"

            self.current_frame = img
            self.volume_pct = new_volume_pct
            
            time.sleep(0.02) 

    def stop(self):
        self.running = False
        if hasattr(self, 'cap') and self.cap.isOpened():
            self.cap.release()

class GestureVolumeApp:
    def __init__(self, window, window_title):
        self.window = window
        self.window.title(window_title)
        
        self.volume_handler = PycawVolumeHandler()
        self.current_volume_pct, self.is_muted = self.volume_handler.get_initial_state()
        
        self.processor = VideoProcessor()
        self.processor.start()
        
        self.setup_gui()
        
        self.delay = 30
        self.update()

        self.window.protocol("WM_DELETE_WINDOW", self.on_closing)
        self.window.mainloop()

    def setup_gui(self):
        self.distance_var = tk.StringVar(self.window, value="Fingers: N/A")
        self.volume_var = tk.StringVar(self.window, value=str(self.current_volume_pct))
        self.mute_text_var = tk.StringVar(self.window)
        
        style = ttk.Style()
        style.theme_use('clam')
        
        main_frame = ttk.Frame(self.window, padding="10 10 10 10")
        main_frame.pack(fill='both', expand=True)
        
        self.video_label = ttk.Label(main_frame, borderwidth=2, relief="groove")
        self.video_label.pack(pady=10)
        
        Label(main_frame, text="Microphone Volume:").pack(pady=(5, 0))
        self.volume_scale = Scale(
            main_frame, 
            from_=0, to=100, orient=HORIZONTAL, length=300, resolution=1,
            variable=self.volume_var, state='disabled',
            troughcolor="#A0E0FF", highlightbackground="#CCCCCC"
        )
        self.volume_scale.pack(pady=5)
        
        distance_frame = ttk.Frame(main_frame)
        distance_frame.pack(fill='x', pady=5)
        ttk.Label(distance_frame, text="Finger Count:", font=('Arial', 12)).pack(side='left', padx=(20, 5)) 
        self.distance_info_label = ttk.Label(
            distance_frame, textvariable=self.distance_var,
            font=('Arial', 14, 'bold'), foreground='#1E88E5'
        )
        self.distance_info_label.pack(side='left', fill='x', expand=True)

        self.mute_button = Button(
            main_frame, textvariable=self.mute_text_var, command=self.toggle_mute,
            fg="white", relief="raised", font=('Arial', 10, 'bold')
        )
        self.mute_button.pack(fill='x', pady=10)
        
        self._update_mute_button_text()
        
        ttk.Label(
            main_frame, 
            text="Control MIC volume by the number of fingers extended (0=0%, 5=100%).",
            font=('Arial', 10, 'italic')
        ).pack(pady=5)

    def _update_mute_button_text(self):
        if self.is_muted:
            self.mute_text_var.set("UNMUTE (Muted)")
            self.mute_button.config(bg="#FF6666", activebackground="#DD4444")
        else:
            self.mute_text_var.set("MUTE (Active)")
            self.mute_button.config(bg="#66FF66", activebackground="#44DD44")
            
    def toggle_mute(self):
        self.is_muted = self.volume_handler.toggle_mute_state(self.is_muted)
        self._update_mute_button_text()

    def update(self):
        
        if self.processor.current_frame is not None:
            img = self.processor.current_frame
            self.photo = ImageTk.PhotoImage(image=Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)))
            self.video_label.config(image=self.photo)
            
            self.distance_var.set(self.processor.distance_text)

        if self.processor.volume_changed:
            new_volume_pct = self.processor.volume_pct
            
            self.volume_var.set(str(new_volume_pct))
            
            self.is_muted = self.volume_handler.set_volume_and_handle_mute(
                new_volume_pct, self.is_muted
            )
            self.processor.volume_changed = False
            self._update_mute_button_text()
            
        self.window.after(self.delay, self.update)
        
    def on_closing(self):
        print("Stopping video processor thread and closing application...")
        self.processor.stop()
        self.processor.join()
        cv2.destroyAllWindows()
        self.window.destroy()

if __name__ == "__main__":
    root = tk.Tk()
    app = GestureVolumeApp(root, "Gesture Microphone Control (Finger Count)")



Stopping video processor thread and closing application...
