In [1]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pretty_midi
import pickle
import os

In [2]:
def LoD_midi_spectrogram(mp3_file_path, midi_file_path):
    # load mp3 and convert to amplitude spectrogram

    # Load audio file
    y, sr = librosa.load(mp3_file_path)
    # Compute the spectrogram
    D = librosa.stft(y)
    # Convert the complex-valued spectrogram to amplitude spectrogram
    D_amp = np.abs(D)

    # load midi
    midi_data = pretty_midi.PrettyMIDI(midi_file_path)
    # get notes
    notes = midi_data.instruments[0].notes
    # get list of dictionary for note+spectrogram as data
    note_data = []
    for note in notes:
        start_time = note.start
        end_time = note.end
        pitch = note.pitch
        # Convert times to spectrogram frames
        start_frame = librosa.time_to_frames(start_time, sr=sr)
        end_frame = librosa.time_to_frames(end_time, sr=sr)
        # Convert pitch to frequency
        freq = librosa.midi_to_hz(pitch)
        # Convert frequency to spectrogram bin index
        bin_index = np.argmin(np.abs(librosa.fft_frequencies(sr=sr) - freq))
        # access the corresponding spectrogram data
        note_spectrogram = D_amp[bin_index, start_frame:end_frame]
        if note_spectrogram.sum()!=0:
            note_data.append({'pitch': pitch,'spectrogram': note_spectrogram})
    print('Success: gathered data for: ', mp3_file_path)
    return note_data

def get_file_path_audio_and_midi (audio_dir = "piano_mp3", midi_dir = "piano_midi"):
    
    # Get paths for files in the audio folder
    audio_paths = [os.path.join(audio_dir, filename) for filename in os.listdir(audio_dir) if os.path.isfile(os.path.join(audio_dir, filename))]
    audio_paths.sort()
    # Get paths for files in the midi folder
    midi_paths = [os.path.join(midi_dir, filename) for filename in os.listdir(midi_dir) if os.path.isfile(os.path.join(midi_dir, filename))]
    midi_paths.sort()
    
    return audio_paths, midi_paths

In [3]:
# get data from local dir
mp3, midi = get_file_path_audio_and_midi()

data = []
for mp3, midi in zip(mp3, midi):
    try:
        music_data = LoD_midi_spectrogram(mp3, midi)
        data.append(music_data)
    except OSError:
        print("Error: failed to gather data for: ", mp3)

Success: gathered data for:  piano_mp3\alb_esp1.mp3
Success: gathered data for:  piano_mp3\alb_esp2.mp3
Success: gathered data for:  piano_mp3\alb_esp3.mp3
Success: gathered data for:  piano_mp3\alb_esp4.mp3
Success: gathered data for:  piano_mp3\alb_esp5.mp3
Success: gathered data for:  piano_mp3\alb_esp6.mp3
Success: gathered data for:  piano_mp3\alb_se1.mp3
Success: gathered data for:  piano_mp3\alb_se2.mp3
Success: gathered data for:  piano_mp3\alb_se3.mp3
Success: gathered data for:  piano_mp3\alb_se4.mp3
Success: gathered data for:  piano_mp3\alb_se5.mp3
Success: gathered data for:  piano_mp3\alb_se6.mp3
Success: gathered data for:  piano_mp3\alb_se7.mp3
Success: gathered data for:  piano_mp3\alb_se8.mp3
Success: gathered data for:  piano_mp3\appass_1.mp3
Success: gathered data for:  piano_mp3\appass_2.mp3
Success: gathered data for:  piano_mp3\appass_3.mp3
Success: gathered data for:  piano_mp3\bach_846.mp3
Success: gathered data for:  piano_mp3\bach_847.mp3
Success: gathered da

Success: gathered data for:  piano_mp3\grieg_album.mp3
Success: gathered data for:  piano_mp3\grieg_berceuse.mp3
Success: gathered data for:  piano_mp3\grieg_brooklet.mp3
Success: gathered data for:  piano_mp3\grieg_butterfly.mp3
Success: gathered data for:  piano_mp3\grieg_elfentanz.mp3
Success: gathered data for:  piano_mp3\grieg_halling.mp3
Success: gathered data for:  piano_mp3\grieg_kobold.mp3
Success: gathered data for:  piano_mp3\grieg_march.mp3
Success: gathered data for:  piano_mp3\grieg_once_upon_a_time.mp3
Success: gathered data for:  piano_mp3\grieg_spring.mp3
Success: gathered data for:  piano_mp3\grieg_voeglein.mp3
Success: gathered data for:  piano_mp3\grieg_waechter.mp3
Success: gathered data for:  piano_mp3\grieg_walzer.mp3
Success: gathered data for:  piano_mp3\grieg_wanderer.mp3
Success: gathered data for:  piano_mp3\grieg_wedding.mp3
Success: gathered data for:  piano_mp3\grieg_zwerge.mp3
Success: gathered data for:  piano_mp3\hay_40_1.mp3
Success: gathered data for

Success: gathered data for:  piano_mp3\scn15_10.mp3
Success: gathered data for:  piano_mp3\scn15_11.mp3
Success: gathered data for:  piano_mp3\scn15_12.mp3
Success: gathered data for:  piano_mp3\scn15_13.mp3
Success: gathered data for:  piano_mp3\scn15_2.mp3
Success: gathered data for:  piano_mp3\scn15_3.mp3
Success: gathered data for:  piano_mp3\scn15_4.mp3
Success: gathered data for:  piano_mp3\scn15_5.mp3
Success: gathered data for:  piano_mp3\scn15_6.mp3
Success: gathered data for:  piano_mp3\scn15_7.mp3
Success: gathered data for:  piano_mp3\scn15_8.mp3
Success: gathered data for:  piano_mp3\scn15_9.mp3
Success: gathered data for:  piano_mp3\scn16_1.mp3
Success: gathered data for:  piano_mp3\scn16_2.mp3
Success: gathered data for:  piano_mp3\scn16_3.mp3
Success: gathered data for:  piano_mp3\scn16_4.mp3
Success: gathered data for:  piano_mp3\scn16_5.mp3
Success: gathered data for:  piano_mp3\scn16_6.mp3
Success: gathered data for:  piano_mp3\scn16_7.mp3
Success: gathered data for:

In [4]:
# store preprocessed data to local
with open("piano_data_spectrogram.pkl", "wb") as f:
    pickle.dump(data, f)