
# 🖐️ MediaPipe Hands — 손가락 카운트 + 제스처 라벨링 (화면맞춤)

**손가락 펴짐 개수**를 계산하고, 단순 규칙 기반으로 **제스처 라벨**을 붙입니다.  
카메라 재시도 초기화, 레터박싱(Screen-Fit), FPS 오버레이, 단축키, 스냅샷 저장 등 기존 UX 유지.

**제스처 규칙 예시 (간단 휴리스틱)**
- **Thumbs Up**: 엄지만 펴짐
- **Point (One)**: 검지만 펴짐
- **Peace (Two / V)**: 검지+중지만 펴짐
- **Three**: 검지+중지+약지 펴짐
- **Rock (Rock’n’Roll)**: 검지+새끼 펴짐
- **Five (Open Palm)**: 5개 모두 펴짐
- 그 외: **Unknown**  
> 필요 시 언제든 규칙을 추가/수정하세요!



## 1) 설치 (필요 시만 실행)


In [None]:

# !pip install --upgrade pip
# !pip install mediapipe opencv-python screeninfo



## 2) 모듈 임포트


In [None]:

import os
import time
from datetime import datetime

import cv2
import numpy as np

# MediaPipe
try:
    import mediapipe as mp
except Exception as e:
    raise RuntimeError("mediapipe가 설치되어 있지 않습니다. 위 설치 셀을 실행하세요.") from e

mp_drawing = mp.solutions.drawing_utils
mp_styles   = mp.solutions.drawing_styles
mp_hands    = mp.solutions.hands



## 3) 화면 해상도 탐지


In [None]:

def _get_screen_size():
    try:
        from screeninfo import get_monitors
        m = get_monitors()[0]
        return int(m.width), int(m.height)
    except Exception:
        pass
    try:
        import tkinter as tk
        root = tk.Tk()
        root.withdraw()
        w = root.winfo_screenwidth()
        h = root.winfo_screenheight()
        root.destroy()
        return int(w), int(h)
    except Exception:
        pass
    return 1280, 720

SCREEN_W, SCREEN_H = _get_screen_size()
print(f"[INFO] Screen size detected: {SCREEN_W}x{SCREEN_H}")



## 4) 설정값


In [None]:

USE_CAMERA   = True
CAP_INDEX    = 0
VIDEO_SOURCE = "./sample.mp4"

WINDOW_NAME = "MediaPipe Hands — Gestures"
SAVE_DIR = "./mp_gesture_snaps"
os.makedirs(SAVE_DIR, exist_ok=True)

# 경고 기준: 총 펴진 손가락 수
COUNT_THRESHOLD = 8

# MediaPipe Hands 파라미터
HANDS_MAX_NUM = 2
HANDS_DET_CONF = 0.5
HANDS_TRK_CONF = 0.5



## 5) 카메라 초기화 (재시도)


In [None]:

def setup_camera_with_retry(index=0):
    methods = [
        {
            'name': 'V4L2_YUYV',
            'backend': cv2.CAP_V4L2,
            'settings': {
                'fourcc': cv2.VideoWriter_fourcc('Y', 'U', 'Y', 'V'),
                'width': 640,
                'height': 480,
                'fps': 30,
                'buffersize': 1,
            }
        },
        {
            'name': 'V4L2_MJPEG',
            'backend': cv2.CAP_V4L2,
            'settings': {
                'fourcc': cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                'width': 640,
                'height': 480,
                'fps': 30,
                'buffersize': 1,
            }
        },
        {
            'name': 'DEFAULT',
            'backend': None,
            'settings': {
                'width': 640,
                'height': 480,
                'fps': 30,
                'buffersize': 1,
            }
        },
    ]

    for method in methods:
        print(f"[CAM] Trying {method['name']}...")
        try:
            cap = cv2.VideoCapture(index) if method['backend'] is None else cv2.VideoCapture(index, method['backend'])
            if not cap.isOpened():
                print(f"[CAM] Open failed with {method['name']}")
                continue

            s = method['settings']
            if 'fourcc' in s:
                cap.set(cv2.CAP_PROP_FOURCC, s['fourcc'])
            cap.set(cv2.CAP_PROP_FRAME_WIDTH,  s['width'])
            cap.set(cv2.CAP_PROP_FRAME_HEIGHT, s['height'])
            cap.set(cv2.CAP_PROP_FPS,          s['fps'])
            cap.set(cv2.CAP_PROP_BUFFERSIZE,   s['buffersize'])

            time.sleep(1.0)

            ok_cnt = 0
            for _ in range(5):
                ret, f = cap.read()
                if ret and f is not None:
                    ok_cnt += 1
                time.sleep(0.1)

            if ok_cnt >= 3:
                print(f"[CAM] Ready with {method['name']}")
                return cap, method['name']
            else:
                print(f"[CAM] Unstable with {method['name']}")
                cap.release()

        except Exception as e:
            print(f"[CAM] Error on {method['name']}: {e}")

    return None, None



## 6) 화면맞춤 & 손가락 카운트 & 제스처 분류 유틸
- 엄지: handedness 기준 `TIP.x` vs `IP.x`
- 나머지: `TIP.y < PIP.y`면 펴짐
- `classify_gesture(opens)`로 제스처 문자열 반환


In [None]:

def letterbox_fit_to_screen(frame, screen_w, screen_h, color=(0,0,0)):
    h, w = frame.shape[:2]
    scale = min(screen_w / w, screen_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

    canvas = np.zeros((screen_h, screen_w, 3), dtype=np.uint8)
    canvas[:] = color
    x_off = (screen_w - new_w) // 2
    y_off = (screen_h - new_h) // 2
    canvas[y_off:y_off+new_h, x_off:x_off+new_w] = resized
    return canvas, scale, x_off, y_off

# MediaPipe Hands 인덱스
THUMB_TIP = 4
THUMB_IP  = 3
INDEX_TIP = 8
INDEX_PIP = 6
MIDDLE_TIP = 12
MIDDLE_PIP = 10
RING_TIP = 16
RING_PIP = 14
PINKY_TIP = 20
PINKY_PIP = 18

def count_fingers_one_hand(hand_landmarks, handed_label):
    lm = hand_landmarks.landmark
    # 엄지: 좌/우에 따라 x 비교 방향 반대로
    if handed_label.lower().startswith("right"):
        thumb_open = lm[THUMB_TIP].x < lm[THUMB_IP].x
    else:
        thumb_open = lm[THUMB_TIP].x > lm[THUMB_IP].x

    index_open  = lm[INDEX_TIP].y  < lm[INDEX_PIP].y
    middle_open = lm[MIDDLE_TIP].y < lm[MIDDLE_PIP].y
    ring_open   = lm[RING_TIP].y   < lm[RING_PIP].y
    pinky_open  = lm[PINKY_TIP].y  < lm[PINKY_PIP].y

    opens = [thumb_open, index_open, middle_open, ring_open, pinky_open]
    return sum(int(v) for v in opens), opens

def classify_gesture(opens):
    # opens = [thumb, index, middle, ring, pinky] (bools)
    t, i, m, r, p = opens
    count = int(t) + int(i) + int(m) + int(r) + int(p)

    if count == 5:
        return "Open Palm (Five)"
    if count == 0:
        return "Fist (Zero)"
    if count == 1:
        if t and not any([i,m,r,p]):
            return "Thumbs Up"
        if i and not any([t,m,r,p]):
            return "Point (One)"
    if count == 2:
        if i and m and not any([t,r,p]):
            return "Peace (Two)"
        if i and p and not any([t,m,r]):
            return "Rock"
    if count == 3:
        if i and m and r and not any([t,p]):
            return "Three"
    # 그 외
    return f"Unknown ({count})"

def put_label_near_wrist(bgr, hand_landmarks, handed_label, gesture, count):
    h, w = bgr.shape[:2]
    wrist = hand_landmarks.landmark[0]
    px, py = int(wrist.x * w), int(wrist.y * h)
    txt = f"{handed_label}: {gesture} [{count}]"
    cv2.putText(bgr, txt, (px+10, py-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,255), 2)



## 7) 실시간 추론 루프 (제스처 라벨 포함)
- 각 손마다 제스처 라벨을 wrist 근처에 표기
- 총 펴진 손가락 수가 `COUNT_THRESHOLD` 이상이면 콘솔 경고


In [None]:

print("[INFO] Starting MediaPipe Hands — Gestures...")

if USE_CAMERA:
    cap, cam_method = setup_camera_with_retry(CAP_INDEX)
    if cap is None:
        raise SystemExit("[FATAL] 카메라 초기화 실패")
    src_desc = f"camera:{CAP_INDEX} ({cam_method})"
else:
    cap = cv2.VideoCapture(VIDEO_SOURCE)
    if not cap.isOpened():
        raise SystemExit(f"[FATAL] 비디오 파일 열기 실패: {VIDEO_SOURCE}")
    src_desc = f"video:{VIDEO_SOURCE}"

print(f"[INFO] Source: {src_desc}")
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)

fullscreen = False
fps = 0.0
frame_count = 0
last_time = time.time()

hands_ctx = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=HANDS_MAX_NUM,
    min_detection_confidence=HANDS_DET_CONF,
    min_tracking_confidence=HANDS_TRK_CONF,
)

try:
    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            print("[WARN] Frame read failed")
            time.sleep(0.05)
            continue

        # 셀피 스타일
        frame = cv2.flip(frame, 1)

        # MediaPipe는 RGB 입력 권장
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands_ctx.process(rgb)

        vis = frame.copy()
        total_open = 0
        left_count = 0
        right_count = 0

        if results.multi_hand_landmarks:
            # handedness 정보(좌/우) 매칭
            handedness_list = []
            if results.multi_handedness:
                for hlabel in results.multi_handedness:
                    handedness_list.append(hlabel.classification[0].label)
            else:
                handedness_list = ["Unknown"] * len(results.multi_hand_landmarks)

            for hand_lm, handed_label in zip(results.multi_hand_landmarks, handedness_list):
                # 랜드마크 그리기
                mp_drawing.draw_landmarks(
                    vis, hand_lm, mp_hands.HAND_CONNECTIONS,
                    mp_styles.get_default_hand_landmarks_style(),
                    mp_styles.get_default_hand_connections_style(),
                )
                # 손가락 개수 + 제스처
                cnt, opens = count_fingers_one_hand(hand_lm, handed_label)
                gesture = classify_gesture(opens)
                put_label_near_wrist(vis, hand_lm, handed_label, gesture, cnt)

                total_open += cnt
                if handed_label.lower().startswith("left"):
                    left_count = cnt
                elif handed_label.lower().startswith("right"):
                    right_count = cnt

        # 화면 맞춤(레터박싱)
        disp, scale, x_off, y_off = letterbox_fit_to_screen(vis, SCREEN_W, SCREEN_H, color=(0,0,0))

        # FPS
        frame_count += 1
        if frame_count % 30 == 0:
            now = time.time()
            fps = 30.0 / (now - last_time)
            last_time = now

        # HUD
        hud = f"Left: {left_count} | Right: {right_count} | Total open: {total_open} | FPS: {fps:.1f}"
        cv2.putText(disp, hud, (10, SCREEN_H-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,0), 2)

        # 콘솔 출력 및 임계치 경고
        print(f"[INFO] Open fingers — Left:{left_count} Right:{right_count} Total:{total_open}")
        if total_open >= COUNT_THRESHOLD:
            print(f"⚠️ Total finger open count >= {COUNT_THRESHOLD}!")

        cv2.imshow(WINDOW_NAME, disp)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            print("[INFO] Exit requested.")
            break
        elif key == ord('f'):
            fullscreen = not fullscreen
            prop = cv2.WND_PROP_FULLSCREEN
            cv2.setWindowProperty(WINDOW_NAME, prop, cv2.WINDOW_FULLSCREEN if fullscreen else cv2.WINDOW_NORMAL)
            if not fullscreen:
                cv2.resizeWindow(WINDOW_NAME, SCREEN_W, SCREEN_H)
        elif key == ord('s'):
            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
            path = os.path.join(SAVE_DIR, f"mp_gesture_{ts}.jpg")
            cv2.imwrite(path, vis)
            print(f"[SAVE] Snapshot: {path}")

except KeyboardInterrupt:
    print("[INFO] Interrupted by user.")

finally:
    if cap is not None:
        cap.release()
    cv2.destroyAllWindows()
    hands_ctx.close()
    print("[CLEANUP] Released resources.")



## 8) 참고 & 확장 아이디어
- 규칙은 간단 휴리스틱입니다. 오차 감소를 원하면 각 관절 벡터의 각도를 이용해 **정규화된 포즈 분류기**로 확장하세요.
- 특정 제스처에서만 **이벤트 트리거**(예: "Thumbs Up이면 촬영", "Peace이면 저장")도 쉽게 추가 가능합니다.
- 멀티프레임 **디바운싱/안정화**가 필요하면 최근 N프레임의 제스처를 `deque`로 모아 **최빈값**을 표시해보세요.
