In [None]:
import cv2
import time
import mediapipe as mp
import pygame


pygame.mixer.init()

songs = {
    "rock": "acdc.mp3",
    "reggae": "bob_marley.mp3",
    "surf": "surf.mp3"
}


rasta_filter = cv2.imread("rasta.png", cv2.IMREAD_UNCHANGED)
rock_filter = cv2.imread("rock.png", cv2.IMREAD_UNCHANGED)
surf_filter = cv2.imread("surf.png", cv2.IMREAD_UNCHANGED)

filters = {
    "reggae": rasta_filter,   
    "rock": rock_filter,
    "surf": surf_filter    
}



mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7,
                       min_tracking_confidence=0.5)


mp_face = mp.solutions.face_mesh
face_mesh = mp_face.FaceMesh(
    max_num_faces=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)


cap = cv2.VideoCapture(0)
current_gesture = None


# FUNCIONES 

def detect_gesture(hand_landmarks):
    tips = [8, 12, 16, 20]
    fingers = []

    # Pulgar
    if hand_landmarks.landmark[4].x < hand_landmarks.landmark[3].x:
        fingers.append(1)
    else:
        fingers.append(0)

    for tip in tips:
        if hand_landmarks.landmark[tip].y < hand_landmarks.landmark[tip-2].y:
            fingers.append(1)
        else:
            fingers.append(0)

    if fingers == [0,1,0,0,1]:
        return "rock"
    elif fingers == [0,1,1,0,0]:
        return "reggae"
    elif fingers == [1,0,0,0,1]:
        return "surf"

    return None


def overlay_filter(frame, filter_img, x, y, w, h):
    filter_resized = cv2.resize(filter_img, (w, h), interpolation=cv2.INTER_NEAREST)

    b, g, r, a = cv2.split(filter_resized)
    a = cv2.threshold(a, 1, 255, cv2.THRESH_BINARY)[1]  
    overlay_color = cv2.merge((b, g, r))
    mask = a / 255.0

    h_frame, w_frame = frame.shape[:2]

    x1 = max(0, x)
    y1 = max(0, y)
    x2 = min(w_frame, x + w)
    y2 = min(h_frame, y + h)

    if x1 >= x2 or y1 >= y2:
        return frame  
    
    filter_x1 = x1 - x
    filter_y1 = y1 - y
    filter_x2 = filter_x1 + (x2 - x1)
    filter_y2 = filter_y1 + (y2 - y1)

    overlay_crop = overlay_color[filter_y1:filter_y2, filter_x1:filter_x2]
    mask_crop = mask[filter_y1:filter_y2, filter_x1:filter_x2]

    roi = frame[y1:y2, x1:x2]

    for c in range(3):
        roi[:, :, c] = overlay_crop[:, :, c] * mask_crop + roi[:, :, c] * (1 - mask_crop)

    frame[y1:y2, x1:x2] = roi
    return frame




while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    gesture = None
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            gesture = detect_gesture(hand_landmarks)

    
    if gesture != current_gesture:
        pygame.mixer.music.stop()
        current_gesture = gesture

        if gesture in songs:
            pygame.mixer.music.load(songs[gesture])
            pygame.mixer.music.play(-1)

    
    
    if current_gesture == "reggae":
        green_filter = frame.copy()
        green_filter[:, :, 1] = cv2.add(green_filter[:, :, 1], 80)   
        frame = cv2.addWeighted(frame, 0.6, green_filter, 0.4, 0)    

    elif current_gesture == "rock":
        red_filter = frame.copy()
        red_filter[:, :, 2] = cv2.add(red_filter[:, :, 2], 80)       
        frame = cv2.addWeighted(frame, 0.6, red_filter, 0.4, 0)       

    elif current_gesture == "surf":
        blue_filter = frame.copy()
        blue_filter[:, :, 0] = cv2.add(blue_filter[:, :, 0], 80)     
        frame = cv2.addWeighted(frame, 0.6, blue_filter, 0.4, 0)     

    


    rgb_face = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result_face = face_mesh.process(rgb_face)

    if result_face.multi_face_landmarks and current_gesture in filters:
        face = result_face.multi_face_landmarks[0]
        h, w, _ = frame.shape

        left_eye = face.landmark[33]
        right_eye = face.landmark[263]

        x1, y1 = int(left_eye.x * w), int(left_eye.y * h)
        x2, y2 = int(right_eye.x * w), int(right_eye.y * h)

        eye_dist = int(((x2 - x1)**2 + (y2 - y1)**2)**0.5)

        filter_w = int(5.2 * eye_dist)
        filter_h = int(5.6 * eye_dist)

        x = int((x1 + x2) / 2 - filter_w/2)
        y = int((y1 + y2) / 2 - filter_h/2 * 0.85)

        frame = overlay_filter(frame, filters[current_gesture], x, y, filter_w, filter_h)

    cv2.imshow("Demo Mano Musical + Filtro", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()
pygame.mixer.music.stop()
