In [1]:
pip install opencv-python mediapipe numpy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import cv2
import mediapipe as mp
import math
import numpy as np
import subprocess # For macOS audio control
import time 

In [5]:
mp_solutions = mp.solutions
mp_hands = mp_solutions.hands
mp_draw = mp_solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, max_num_hands=1)

I0000 00:00:1751163399.737446  140327 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M2


In [7]:
# import cv2
# def list_cameras():
#     i = 0
#     while True:
#         cap = cv2.VideoCapture(i)
#         if not cap.read()[0]: 
#             break
#         else:
#             print(f"Camera found at index {i}")
#             cap.release() 
#         i += 1
#     if i == 0:
#         print("No cameras found. Check connections and permissions.")

# if __name__ == "__main__":
#     list_cameras()

In [9]:
cap = cv2.VideoCapture(1)
if not cap.isOpened():
    print("Error: Could not open video stream. Make sure your webcam is connected and accessible.")
    print("If you are on macOS, you might need to grant camera permissions in System Settings > Privacy & Security > Camera.")
    exit()



In [11]:
current_volume_percentage = 0 
prev_volume_percentage = 0 
smoothing_factor = 0.7

In [13]:
# Mute Control Variables
last_mute_action_time = 0
mute_cooldown_duration = 2

In [15]:
# Gesture Confirmation Visuals Variables
confirmation_center = None
confirmation_color = None
confirmation_end_time = 0
confirmation_duration = 0.3  # seconds

In [17]:
def finger_extended(tip_y, pip_y, threshold=0.03):
    # Tip is "higher" (smaller y-value) than PIP for an extended finger
    return tip_y < pip_y - threshold

In [19]:
def finger_curled(tip_y, pip_y, threshold=0.03):
    # Tip is "lower" (larger y-value) than PIP for a curled finger
    return tip_y > pip_y + threshold

In [21]:
def get_system_mute_status_macos():
    try:
        output = subprocess.check_output(["osascript", "-e", "output muted of (get volume settings)"]).decode("utf-8").strip()
        return output == "true"
    except Exception as e:
        # print(f"DEBUG: Error getting mute status: {e}") 
        return False

In [23]:
def set_system_mute_status_macos(mute_state):
    try:
        subprocess.call(["osascript", "-e", f"set volume output muted {str(mute_state).lower()}"])
        print(f"DEBUG: osascript mute command sent: {mute_state}") # Confirm command was sent
    except Exception as e:
        print(f"ERROR: Failed to send mute command via osascript: {e}")


In [27]:
while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to grab frame.")
        break

    frame = cv2.flip(frame, 1)
    h, w, c = frame.shape

    # --- Instructional Text (Top Right) ---
    instruction1 = "Open Palm to Mute"
    instruction2 = "Peace Sign to Unmute"
    font = cv2.FONT_HERSHEY_PLAIN
    font_scale = 2
    color = (0, 0, 0)
    thickness = 2
    (w1, h1), _ = cv2.getTextSize(instruction1, font, font_scale, thickness)
    (w2, h2), _ = cv2.getTextSize(instruction2, font, font_scale, thickness)
    margin = 30
    pos1 = (w - w1 - margin, margin + h1)
    pos2 = (w - w2 - margin, margin + h1 + h2 + 20)
    cv2.putText(frame, instruction1, pos1, font, font_scale, color, thickness)
    cv2.putText(frame, instruction2, pos2, font, font_scale, color, thickness)

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    is_muted = get_system_mute_status_macos()
    mute_status_text = "Muted" if is_muted else "Unmuted"
    cv2.putText(frame, f'Status: {mute_status_text}', (50, 80), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 0) if is_muted else (0, 0, 0), 2)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # drawing a line between thumb and index finger
            thumb_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_MCP]
            index_finger_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
            thumb_point = (int(thumb_mcp.x * w), int(thumb_mcp.y * h))
            index_point = (int(index_finger_mcp.x * w), int(index_finger_mcp.y * h))
            cv2.line(frame, thumb_point, index_point, (224, 224, 224), 2)
          

            # Volume Control Logic
            thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
            index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
            x1, y1 = int(thumb_tip.x * w), int(thumb_tip.y * h)
            x2, y2 = int(index_tip.x * w), int(index_tip.y * h)
            length = math.hypot(x2 - x1, y2 - y1)
            min_hand_length, max_hand_length = 30, 200

            target_volume_percentage = np.interp(length, [min_hand_length, max_hand_length], [0, 100])
            current_volume_percentage = (smoothing_factor * current_volume_percentage) + ((1 - smoothing_factor) * target_volume_percentage)

            if abs(current_volume_percentage - prev_volume_percentage) > 2:
                if not is_muted:
                    subprocess.call(["osascript", "-e", f"set volume output volume {int(current_volume_percentage)}"])
                prev_volume_percentage = current_volume_percentage

            cv2.putText(frame, f'Vol: {int(current_volume_percentage)}%', (50, 50), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 0), 2)

            # Customizable Volume Bar
            bar_height = int(np.interp(current_volume_percentage, [0, 100], [400, 150]))
            cv2.rectangle(frame, (50, 150), (85, 400), (0, 0, 0), 3) # Outline
            cv2.rectangle(frame, (50, bar_height), (85, 400), (255, 255, 255), -1) # Fill
            cv2.putText(frame, "100%", (40, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            cv2.putText(frame, "0%", (45, 415), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            # Mute/Unmute Gesture Detection Logic
            current_time = time.time()
            if current_time - last_mute_action_time > mute_cooldown_duration:
                thumb_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y
                thumb_ip_y = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_IP].y
                index_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y
                index_pip_y = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].y
                middle_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP].y
                middle_pip_y = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y
                ring_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP].y
                ring_pip_y = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_PIP].y
                pinky_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP].y
                pinky_pip_y = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_PIP].y

                is_full_open_palm = (finger_extended(thumb_tip_y, thumb_ip_y) and
                                     finger_extended(index_tip_y, index_pip_y) and
                                     finger_extended(middle_tip_y, middle_pip_y) and
                                     finger_extended(ring_tip_y, ring_pip_y) and
                                     finger_extended(pinky_tip_y, pinky_pip_y))
                is_peace_sign = (finger_extended(index_tip_y, index_pip_y) and
                                 finger_extended(middle_tip_y, middle_pip_y) and
                                 finger_curled(ring_tip_y, ring_pip_y) and
                                 finger_curled(pinky_tip_y, pinky_pip_y))

                if is_full_open_palm and not is_muted:
                    set_system_mute_status_macos(True)
                    last_mute_action_time = time.time()
                    confirmation_center = (int(np.mean([lm.x * w for lm in hand_landmarks.landmark])), int(np.mean([lm.y * h for lm in hand_landmarks.landmark])))
                    confirmation_color = (0, 0, 255) # Red for mute
                    confirmation_end_time = time.time() + confirmation_duration

                elif is_peace_sign and is_muted:
                    set_system_mute_status_macos(False)
                    last_mute_action_time = time.time()
                    confirmation_center = (int(np.mean([lm.x * w for lm in hand_landmarks.landmark])), int(np.mean([lm.y * h for lm in hand_landmarks.landmark])))
                    confirmation_color = (0, 255, 0) # Green for unmute
                    confirmation_end_time = time.time() + confirmation_duration

    # Draw Gesture Confirmation Circle
    if confirmation_center and time.time() < confirmation_end_time:
        cv2.circle(frame, confirmation_center, 20, confirmation_color, -1)

    cv2.imshow("Gesture Volume & Mute Control (macOS)", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or key == ord('Q') or key == 27:
        break

KeyboardInterrupt: 

In [27]:
cap.release()
cv2.destroyAllWindows()