In [6]:
import cv2
import numpy as np

points = []

def get_points(event, x, y, flags, param):
    global points
    img = param["img"]

    if event == cv2.EVENT_LBUTTONDOWN:
        points.append([x, y])
        cv2.circle(img, (x, y), 6, (0, 255, 0), -1)
        cv2.imshow("Calibracion", img)
        print(points)

cap = cv2.VideoCapture(0)
ret, frame = cap.read()
cap.release()

clone = frame.copy()
cv2.imshow("Calibracion", clone)
cv2.setMouseCallback("Calibracion", get_points, {"img": clone})
cv2.waitKey(0)
cv2.destroyAllWindows()

points = np.array(points, dtype=np.float32)

np.save("piano_points.npy", points)
print("Puntos guardados:", points)

[[149, 196]]
[[149, 196], [527, 183]]
[[149, 196], [527, 183], [576, 403]]
[[149, 196], [527, 183], [576, 403], [138, 424]]
Puntos guardados: [[149. 196.]
 [527. 183.]
 [576. 403.]
 [138. 424.]]


In [None]:
import cv2
import numpy as np
import mediapipe as mp
import pygame

# ---------- CONFIGURACIÓN ----------
NOTES = ["DO", "RE", "MI", "FA", "SOL", "LA", "SI", "DO"]
SOUNDS = ["sounds/do.mp3", "sounds/re.mp3", "sounds/mi.mp3",
          "sounds/fa.mp3", "sounds/sol.mp3", "sounds/la.mp3",
          "sounds/si.mp3", "sounds/do.mp3"]

pygame.mixer.init()
note_sounds = [pygame.mixer.Sound(s) for s in SOUNDS]

touch_threshold = -0.035      # ajustable
prev_fy = None               # para velocidad vertical
finger_was_down = False      # estado anterior

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

# ---------- CARGAR PUNTOS Y CREAR HOMOGRAFÍA ----------
img_size = (800, 300)  # tamaño plano del piano

src_pts = np.load("piano_points.npy")

dst_pts = np.array([
    [0, 0],
    [img_size[0], 0],
    [img_size[0], img_size[1]],
    [0, img_size[1]]
], dtype=np.float32)

H = cv2.getPerspectiveTransform(src_pts, dst_pts)

cap = cv2.VideoCapture(0)

# ---------- BUCLE PRINCIPAL ----------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    h_img, w_img, _ = frame.shape

    # ---- APLICAR HOMOGRAFÍA ----
    piano_flat = cv2.warpPerspective(frame, H, img_size)

    gray = cv2.cvtColor(piano_flat, cv2.COLOR_BGR2GRAY)

    # ---- MEDIA PIPE ----
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    fx, fy, fz = None, None, None

    if result.multi_hand_landmarks:
        hand_landmarks = result.multi_hand_landmarks[0]
        index_finger = hand_landmarks.landmark[8]

        fx = int(index_finger.x * w_img)
        fy = int(index_finger.y * h_img)
        fz = index_finger.z

        # --------- FILTRADO SUAVE DE Z (anti ruido) ----------
        if not hasattr(globals(), "fz_smooth"):
            fz_smooth = fz
        else:
            fz_smooth = 0.7 * fz_smooth + 0.3 * fz

        fz = fz_smooth

        cv2.circle(frame, (fx, fy), 8, (0, 255, 255), -1)

        # Transformar dedo al plano del piano
        p = np.array([[[fx, fy]]], dtype=np.float32)
        p_flat = cv2.perspectiveTransform(p, H)
        fx_p, fy_p = int(p_flat[0][0][0]), int(p_flat[0][0][1])
    else:
        fx_p, fy_p = None, None

    # ---- TECLAS + SONIDO ----
    key_width = img_size[0] // 8
    current_key = -1
    # --------- DETECCIÓN REAL DE BAJADA ----------
    finger_is_down = False
    if fz is not None and prev_fy is not None:
        velocity = fy - prev_fy   # cuánto baja entre frames

        if fz < touch_threshold and velocity > 4:
            finger_is_down = True

    for i in range(8):
        x1 = i * key_width
        x2 = x1 + key_width

        inside = False
        if fx_p is not None:
            if x1 < fx_p < x2 and 0 < fy_p < img_size[1]:
                inside = True
                current_key = i

        color = (0, 255, 255) if (inside and finger_is_down) else (255, 0, 0)

        cv2.rectangle(piano_flat, (x1, 0), (x2, img_size[1]), color, 2)
        cv2.putText(piano_flat, NOTES[i], (x1+20, img_size[1]-30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)

    if current_key != -1 and finger_is_down and not finger_was_down:
        note_sounds[current_key].play()

    finger_was_down = finger_is_down

    cv2.imshow("Vista Camara", frame)
    cv2.imshow("Piano Plano", piano_flat)

    if cv2.waitKey(1) & 0xFF == 27:
        break

    prev_fy = fy

cap.release()
cv2.destroyAllWindows()
pygame.quit()
