In [5]:
import mediapipe as mp #needs python 3.10 or less!!
import cv2
import pysynth #generates music notes

#to store the previous note and then delete it; potential alternative : storing individual notes as files
import simpleaudio as sa
import os
import tempfile

In [6]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode = False,          #it's a continuous video
    max_num_hands = 2,                  #2 hands - pretty self explanatory
    min_detection_confidence = 0.5,     #experiment?
    min_tracking_confidence = 0.5       #experiment?
)

mp_draw = mp.solutions.drawing_utils #to draw graph on the hands - not needed, just for our reference right now
#may or may not be needed to draw piano lines?

In [7]:
#CONSTANTS.

#FINGER TRACKING
finger_tip_ids = [4, 8 ,12, 16, 20] #thumb, index, middle, ring, pinky - the indices of each finger

#PIANO LINES
white_keys = 15 #doing 3 Cs
white_width = 42 #taken from frame.shape[1] // white_keys, will need to change this based on app size
top_y, bottom_y = 240, 480 #the limits of the piano set up
black_keys = 13
#the top is the same because they all start at the same line, but the bottom is shorter to fit the idea of a black key
black_width = int(white_width * 0.6)
black_height = int((bottom_y - top_y) * 0.6) #manually identifying the right dimensions
black_bottom = top_y + black_height

#PIANO SOUNDS
white_key_dictionary = {0: 'c4', 1: 'd4', 2: 'e4', 3: 'f4', 4: 'g4', 5: 'a4', 6: 'b4', 7: 'c5', 8: 'd5', 9: 'e5', 10: 'f5', 11: 'g5', 12: 'a5', 13: 'b5', 14: 'c6'}
black_key_dictionary = {1: "c#4", 2: "d#4", 4: "f#4", 5: "g#4", 6: "a#4", 8: "c#5", 9: "d#5", 11: 'f#5', 12: "g#5", 13: "a#5"}

#Connecting rectangles to keys
white_rectangles = []
for i in range(len(white_key_dictionary)):
        white_rectangles.append((white_key_dictionary[i], (i * white_width, top_y, (i + 1) * white_width, bottom_y)))
black_rectangles = []
black_pos = [1, 2, 4, 5, 6, 8, 9, 11, 12, 13]
    # so we're able to keep track between which keys the black ones come
    # because they require more tracking than white due to their positions
for pos, label in black_key_dictionary.items():
    #following the same formula used above
    #then we store that into the black_rectangles
    black_rectangles.append((label, (pos * white_width - black_width // 2, top_y, pos * white_width + black_width // 2, black_bottom)))
        #name, (dimensions)


#checking if the finger is in the piano key
def is_point_in_rect(point, rect):
    x, y = point
    x1, y1, x2, y2 = rect
    return x1 <= x <= x2 and y1 <= y <= y2

def preload_notes(note_dicts):
    loaded_notes = {}
    for note_dict in note_dicts:
        for note in note_dict.values():
            tune = [(note, 4)]
            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
            temp_path = temp_file.name
            temp_file.close()
            pysynth.make_wav(tune, fn=temp_path, bpm=120)
            loaded_notes[note] = sa.WaveObject.from_wave_file(temp_path)
            os.remove(temp_path)
    return loaded_notes

# Preload both white and black notes once
loaded_sounds = preload_notes([white_key_dictionary, black_key_dictionary])

def play_note(note):
    if note in loaded_sounds:
        loaded_sounds[note].play()

Writing to file C:\Users\sinha\AppData\Local\Temp\tmps5nw5bx_.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpmezivmet.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmppudaho23.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmprmae673o.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpaweew8nk.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpucaqmmvi.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpaaigzal5.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpcoethd10.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpchz4mytj.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpn5l8hii8.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmpvb1m5rxh.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmp6xkn__oe.wav
[1/1]	

Writing to file C:\Users\sinha\AppData\Local\Temp\tmprg6g63g8.wav
[1/1]	

Writing to file C:\Users\sinha\AppData

In [4]:
capture = cv2.VideoCapture(0) #starts camera - if more than one camera, the index can vary
if not capture.isOpened():
    print("Error: Could not open your camera")
    exit()

while True: #goes endlessly
    ret, frame = capture.read() #ret is a boolean value indicating if read correctly, frame is the Image
    if not ret:
        print("Error: Could not read the frame")
        break
    frame = cv2.flip(frame, 1)  # Flip horizontally for selfie-view
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    #drawing the middle/piano line
    cv2.line(img = frame, pt1 = (0, round(frame.shape[0] / 2)), pt2 = (frame.shape[1], round(frame.shape[0] / 2)), color = (255, 0, 0), thickness = 5)

    #WHITE KEYS (-1 for solid white, 2 for black outline):
    #cv2.rectangle(frame [in which we're inputting], x dimension, y dimension, color, -1 for white fit, 2 for black outline)
    for i in range(len(white_key_dictionary)):
        cv2.rectangle(frame, (i * white_width, top_y), ((i + 1) * white_width, bottom_y), (255, 255, 255), -1)
        cv2.rectangle(frame, (i * white_width, top_y), ((i + 1) * white_width, bottom_y), (0, 0, 0), 2)
    #BLACK KEYS:
    #cv2.rectangle(frame [which we're putting black keys into], x dimension, y dimension, black color, -1 for black color)
    for key in black_key_dictionary:
        cv2.rectangle(frame, (key * white_width - black_width // 2, top_y), (key * white_width + black_width // 2, black_bottom), (0, 0, 0), -1)

    #to make sure we're tracking properly we have two variables
    # last pressed -- keeps track of the key that was last played
    # finger below line -- tracks where the finger touches from
    last_pressed = None
    finger_below_line = False


    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks on the frame
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) #the circles

            # Get landmark positions
            h, w, _ = frame.shape
            landmarks = hand_landmarks.landmark

            # Example: Print fingertip coordinates
            for tip_id in finger_tip_ids:
                x = int(landmarks[tip_id].x * w)
                y = int(landmarks[tip_id].y * h)
                cv2.circle(frame, (x, y), 5, (0, 255, 0), cv2.FILLED)

                
                if y > top_y:
                    if not finger_below_line:  # finger just crossed below line
                        pressed_key = None

                        # checking the black keys first since they have more detail
                        for label, rect in black_rectangles:
                            if is_point_in_rect((x, y), rect):
                                pressed_key = label
                                break

                        # now checking white keys since no black keys match
                        if not pressed_key:
                            for label, rect in white_rectangles:
                                if is_point_in_rect((x, y), rect):
                                    pressed_key = label
                                    break

                        if pressed_key and pressed_key != last_pressed:
                            print(f"pressed {pressed_key}")
                            play_note(pressed_key)
                            last_pressed = pressed_key

                        finger_below_line = True
                else:
                    # resetting after finger moved above line
                    finger_below_line = False
                    last_pressed = None

            
    
    cv2.imshow('Finger Tracking', frame) #shows every frame on the screen, Finger Tracking is the title, can be a cool other name as well.

    if (cv2.waitKey(5) & 0xFF == ord('i')): #if i (for Illinois, lol) is clicked, it terminates the window
        #this is read every 5 ms - may increase this time because taking a reading every 5ms is a lot
        break

capture.release() #frees up the resources/camera - camera won't be allowed to be used otherwise
cv2.destroyAllWindows() #closes the window automatically

pressed b4
pressed b4
pressed b4
pressed b4
pressed b4
pressed c5
pressed c#5
pressed b4
pressed b4
pressed b4
pressed e5
pressed e5
pressed c5
pressed c5
pressed c5
pressed b4
pressed a#4
pressed d5
pressed c#5
pressed d5
pressed b4
pressed a4
pressed g#4
pressed g#4
pressed b4
pressed g#4
pressed b4
pressed f4
pressed a#4
pressed f4
pressed a4
pressed f4
pressed g#4
pressed a4
pressed e4
pressed a4
pressed e4
pressed g#4
pressed e4
pressed a4
pressed e4
pressed a4
pressed a4
pressed e4
pressed a4
pressed a4
pressed a4
pressed a4
pressed g#4
pressed g4
pressed f#4
pressed a#4
pressed g#4
pressed g4
pressed c#5
pressed c#5
pressed c5
pressed b4
pressed b4
pressed d#5
pressed d#5
pressed e5
pressed g5
pressed f5
pressed f#5
pressed f5
pressed f5
pressed f5
pressed f5
pressed g#5
pressed f#5
pressed a#5
pressed e5
pressed f5
pressed e5
pressed e5
pressed d#5
pressed d#5
pressed c#5
pressed c#5
pressed c#5
pressed c#5
pressed c#5
pressed a#4
pressed a#4
pressed a#4
pressed a#4
pressed a4
