In [2]:
import cv2
import mediapipe as mp
import pyautogui

# MediaPipe setup
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)

# Utility: Check if finger is up
def is_finger_up(landmarks, tip_id, pip_id):
    return landmarks[tip_id].y < landmarks[pip_id].y

# Gesture detection
def classify_gesture(landmarks):
    fingers = []

    # Thumb
    fingers.append(landmarks[4].x < landmarks[3].x)  # Left hand assumption

    # Fingers (Index, Middle, Ring, Pinky)
    fingers.append(is_finger_up(landmarks, 8, 6))   # Index
    fingers.append(is_finger_up(landmarks, 12, 10)) # Middle
    fingers.append(is_finger_up(landmarks, 16, 14)) # Ring
    fingers.append(is_finger_up(landmarks, 20, 18)) # Pinky

    if fingers == [False, True, False, False, False]:
        return "One Finger"
    elif fingers == [False, False, False, False, False]:
        return "Fist"
    elif fingers == [True, False, False, False, False]:
        return "Thumbs Up"
    elif fingers == [True, True, True, True, True]:
        return "Open Palm"
    else:
        return "Unknown"

# Webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            gesture = classify_gesture(hand_landmarks.landmark)

            # Show gesture on screen
            cv2.putText(frame, f"Gesture: {gesture}", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 0), 2)

            # Trigger actions
            if gesture == "One Finger":
                pyautogui.press("space")  # Play/Pause
            elif gesture == "Fist":
                pyautogui.press("s")  # Stop
            elif gesture == "Thumbs Up":
                pyautogui.press("up")  # Like/Volume up
            elif gesture == "Open Palm":
                pyautogui.press("r")  # Reset

    cv2.imshow("HandyPanda 🐼", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()


In [3]:
import cv2
import mediapipe as mp
import pyautogui

# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# Utility Functions
def is_finger_up(landmarks, tip_id, pip_id):
    return landmarks[tip_id].y < landmarks[pip_id].y

def is_finger_folded(landmarks, tip_id, pip_id):
    return landmarks[tip_id].y > landmarks[pip_id].y

def is_thumb_up(landmarks):
    return landmarks[4].y < landmarks[3].y < landmarks[2].y

# Gesture Classifier
def classify_gesture(landmarks):
    thumb_up = is_thumb_up(landmarks)
    index_up = is_finger_up(landmarks, 8, 6)
    middle_folded = is_finger_folded(landmarks, 12, 10)
    ring_folded = is_finger_folded(landmarks, 16, 14)
    pinky_folded = is_finger_folded(landmarks, 20, 18)
    index_folded = is_finger_folded(landmarks, 8, 6)

    if thumb_up and index_folded and middle_folded and ring_folded and pinky_folded:
        return "Thumbs Up 👍"
    
    if index_up and middle_folded and ring_folded and pinky_folded:
        return "Index Pointing 👉"

    return "Unknown"

# OpenCV Webcam Capture
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Flip and convert color
    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process frame
    result = hands.process(rgb)

    gesture = "Waiting..."

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            landmarks = hand_landmarks.landmark
            gesture = classify_gesture(landmarks)

            # Display gesture
            cv2.putText(frame, f'Gesture: {gesture}', (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 100), 2)

    # Display output
    cv2.imshow("HandyPanda 🐼", frame)

    # Press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release everything
cap.release()
cv2.destroyAllWindows()


In [8]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

# Initialize MediaPipe and PyAutoGUI
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# Screen size for mapping
screen_w, screen_h = pyautogui.size()
prev_y = None
click_threshold = 0.05  # Distance threshold for click
scroll_sensitivity = 0.01 # Sensitivity for scroll : higher = less sensitive
# Distance utility
def get_distance(p1, p2):
    return np.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2)

# Webcam feed
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            landmarks = hand_landmarks.landmark

            # --- Mouse Movement ---
            index_tip = landmarks[8]
            x = int(index_tip.x * screen_w)
            y = int(index_tip.y * screen_h)
            pyautogui.moveTo(x, y, duration=0.01)

            # --- Click Gesture (thumb & index touch) ---
            thumb_tip = landmarks[4]
            distance = get_distance(thumb_tip, index_tip)

            if distance < click_threshold:
                pyautogui.click()
                cv2.putText(frame, "Click!", (10, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 100, 255), 3)

            # --- Scroll Gesture (wrist movement) ---
            wrist_y = landmarks[0].y
            if prev_y is not None:
                delta = wrist_y - prev_y
                if abs(delta) > scroll_sensitivity:
                    direction = int(-delta * 100)
                    pyautogui.scroll(direction)
            prev_y = wrist_y

    cv2.imshow("HandyPanda 🐼 Mouse Control", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



In [9]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

# Initialize MediaPipe and PyAutoGUI
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# Screen size
screen_w, screen_h = pyautogui.size()
prev_y = None
scroll_sensitivity = 0.03
click_threshold = 0.05

# Distance utility
def get_distance(p1, p2):
    return np.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2)

# Finger state checkers
def is_finger_up(landmarks, tip_id, pip_id):
    return landmarks[tip_id].y < landmarks[pip_id].y

def is_finger_folded(landmarks, tip_id, pip_id):
    return landmarks[tip_id].y > landmarks[pip_id].y

def is_fist(landmarks):
    return all(is_finger_folded(landmarks, tip, pip)
               for tip, pip in [(8, 6), (12, 10), (16, 14), (20, 18)])

def is_index_pointing(landmarks):
    return (is_finger_up(landmarks, 8, 6) and
            all(is_finger_folded(landmarks, tip, pip)
                for tip, pip in [(12, 10), (16, 14), (20, 18)]))

# Webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    action_text = "Idle"

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            landmarks = hand_landmarks.landmark

            index_tip = landmarks[8]
            thumb_tip = landmarks[4]
            middle_tip = landmarks[12]

            # Click Gestures
            distance_thumb_index = get_distance(index_tip, thumb_tip)
            distance_thumb_middle = get_distance(thumb_tip, middle_tip)

            if distance_thumb_index < click_threshold:
                pyautogui.click()
                action_text = "Left Click"

            elif distance_thumb_middle < click_threshold:
                pyautogui.rightClick()
                action_text = "Right Click"

            elif is_fist(landmarks):
                # --- Scroll Mode ---
                wrist_y = landmarks[0].y
                if prev_y is not None:
                    delta = wrist_y - prev_y
                    if abs(delta) > scroll_sensitivity:
                        direction = int(-delta * 100)
                        pyautogui.scroll(direction)
                        action_text = "Scrolling"
                prev_y = wrist_y

            elif is_index_pointing(landmarks):
                # --- Mouse Movement Mode ---
                x = int(index_tip.x * screen_w)
                y = int(index_tip.y * screen_h)
                pyautogui.moveTo(x, y, duration=0.01)
                action_text = "Moving Mouse"
                prev_y = None  # reset scroll tracking

            else:
                action_text = "Idle"
                prev_y = None

    # Show action label
    cv2.putText(frame, f'Mode: {action_text}', (10, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 150), 2)

    cv2.imshow("HandyPanda 🐼", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [11]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

# Initialize
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

screen_w, screen_h = pyautogui.size()
prev_y = None
click_threshold = 0.05
scroll_sensitivity = 0.02

# State flag
scroll_mode = False

# Distance helper
def get_distance(p1, p2):
    return np.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2)

# Finger logic
def is_finger_folded(landmarks, tip_id, pip_id):
    return landmarks[tip_id].y > landmarks[pip_id].y

def all_fingers_folded(landmarks):
    folded = [is_finger_folded(landmarks, tip, pip)
              for tip, pip in [(8, 6), (12, 10), (16, 14), (20, 18)]]
    return all(folded)

def is_index_only_up(landmarks):
    return (landmarks[8].y < landmarks[6].y and
            all(is_finger_folded(landmarks, tip, pip)
                for tip, pip in [(12, 10), (16, 14), (20, 18)]))

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    action_text = "Idle"

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            landmarks = hand_landmarks.landmark

            index_tip = landmarks[8]
            thumb_tip = landmarks[4]
            middle_tip = landmarks[12]

            # Gesture distances
            distance_thumb_index = get_distance(index_tip, thumb_tip)
            distance_thumb_middle = get_distance(thumb_tip, middle_tip)

            # Clicks
            if distance_thumb_index < click_threshold:
                pyautogui.click()
                action_text = "Left Click"
                scroll_mode = False
                continue

            if distance_thumb_middle < click_threshold:
                pyautogui.rightClick()
                action_text = "Right Click"
                scroll_mode = False
                continue

            # Enter/exit scroll mode
            if all_fingers_folded(landmarks):
                if not scroll_mode:
                    scroll_mode = True
                    prev_y = landmarks[0].y
                else:
                    wrist_y = landmarks[0].y
                    delta = wrist_y - prev_y
                    if abs(delta) > scroll_sensitivity:
                        pyautogui.scroll(int(-delta * 100))
                        action_text = "Scrolling"
                        prev_y = wrist_y
            else:
                scroll_mode = False
                prev_y = None

            # Mouse movement
            if is_index_only_up(landmarks) and not scroll_mode:
                x = int(index_tip.x * screen_w)
                y = int(index_tip.y * screen_h)
                pyautogui.moveTo(x, y, duration=0.01)
                action_text = "Moving Mouse"

    # Display state
    cv2.putText(frame, f'Mode: {action_text}', (10, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 150), 2)

    cv2.imshow("HandyPanda 🐼", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [12]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

screen_w, screen_h = pyautogui.size()
prev_y = None
scroll_sensitivity = 0.02
click_threshold = 0.05
scroll_mode = False

# Calibration Box (portion of webcam frame to use)
calib_left = 0.2
calib_top = 0.2
calib_right = 0.8
calib_bottom = 0.8

# Smoothing factor
smooth_factor = 5
prev_x, prev_y_mouse = 0, 0

def get_distance(p1, p2):
    return np.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2)

def is_finger_folded(landmarks, tip, pip):
    return landmarks[tip].y > landmarks[pip].y

def all_fingers_folded(landmarks):
    return all(is_finger_folded(landmarks, tip, pip)
               for tip, pip in [(8, 6), (12, 10), (16, 14), (20, 18)])

def is_index_only_up(landmarks):
    return (landmarks[8].y < landmarks[6].y and
            all(is_finger_folded(landmarks, tip, pip)
                for tip, pip in [(12, 10), (16, 14), (20, 18)]))

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    frame_h, frame_w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    action_text = "Idle"

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            landmarks = hand_landmarks.landmark

            index_tip = landmarks[8]
            thumb_tip = landmarks[4]
            middle_tip = landmarks[12]

            dist_thumb_index = get_distance(index_tip, thumb_tip)
            dist_thumb_middle = get_distance(thumb_tip, middle_tip)

            if dist_thumb_index < click_threshold:
                pyautogui.click()
                action_text = "Left Click"
                scroll_mode = False
                continue

            if dist_thumb_middle < click_threshold:
                pyautogui.rightClick()
                action_text = "Right Click"
                scroll_mode = False
                continue

            if all_fingers_folded(landmarks):
                if not scroll_mode:
                    scroll_mode = True
                    prev_y = landmarks[0].y
                else:
                    wrist_y = landmarks[0].y
                    delta = wrist_y - prev_y
                    if abs(delta) > scroll_sensitivity:
                        pyautogui.scroll(int(-delta * 100))
                        action_text = "Scrolling"
                        prev_y = wrist_y
            else:
                scroll_mode = False
                prev_y = None

            if is_index_only_up(landmarks) and not scroll_mode:
                x_norm = np.clip((index_tip.x - calib_left) / (calib_right - calib_left), 0, 1)
                y_norm = np.clip((index_tip.y - calib_top) / (calib_bottom - calib_top), 0, 1)

                x = int(x_norm * screen_w)
                y = int(y_norm * screen_h)

                # Smooth movement
                curr_x = prev_x + (x - prev_x) // smooth_factor
                curr_y = prev_y_mouse + (y - prev_y_mouse) // smooth_factor
                pyautogui.moveTo(curr_x, curr_y, duration=0.01)
                prev_x, prev_y_mouse = curr_x, curr_y

                action_text = "Moving Mouse"

    # Draw calibration rectangle
    cv2.rectangle(frame,
                  (int(calib_left * frame_w), int(calib_top * frame_h)),
                  (int(calib_right * frame_w), int(calib_bottom * frame_h)),
                  (255, 0, 255), 2)

    cv2.putText(frame, f'Mode: {action_text}', (10, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 150), 2)

    cv2.imshow("HandyPanda 🐼", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
