In [30]:
import pretty_midi
import os
import numpy as np
import heapq
import pickle

# See https://jazz-soft.net/demo/GeneralMidi.html for which instrument each number represents
#instruments = [0, 6, 40, 41, 42, 43, 45, 60, 68, 70, 71, 73]

num_notes = 128 # Number of pitches in MIDI

# Number of time steps per second (this is what NES-MDB uses)
steps_per_sec = 24

# At each time step, an instrument can turn on notes and turn off notes. For each note it's turning on, it picks the note's velocity
action_dim = 3*num_notes
    
base_path = 'musicnet_midis/'

fnum = 0 # Which file are we writing currently?

data_fnames = [] # Save file name corresponding to each numpy array
for composer in os.listdir(base_path):
    print('Starting ' + composer)
    for fname in os.listdir(base_path + composer):
        try:
            mid = pretty_midi.PrettyMIDI(base_path + composer + '/' + fname)
        except:
            # There are 7 files that cause an IO error, both with mido and pretty_midi. Haven't looked into why
            continue
            
        # Number of time steps needed to encode the file. +1 because of the 0 step
        tsteps = int(round(mid.get_end_time()*steps_per_sec) + 1)
        
        data = np.zeros((len(mid.instruments), tsteps, action_dim), dtype=np.float32)
        
        for i, instrument in enumerate(mid.instruments):
            for n, note in enumerate(instrument.notes):
                start = int(round(note.start*steps_per_sec))
                end = int(round(note.end*steps_per_sec))
                
                data[i, start, note.pitch] = 1 # Turn on the note
                data[i, end, num_notes + note.pitch] = 1 # Turn off the note
                data[i, start, 2*num_notes + note.pitch] = note.velocity/127               
                
        np.save('preprocessed_data_sync/recording' + str(fnum) + '.npy', data)
        
        # Also save a numpy array containing the MIDI number for each instrument
        instruments = np.array([instrument.program for instrument in mid.instruments])
        np.save('preprocessed_data_sync/instruments' + str(fnum) + '.npy', instruments)
        
        data_fnames.append(composer + '/' + fname)
        fnum += 1
        
pickle.dump(data_fnames, open( "preprocessed_data_sync_fnames.p", "wb" ) )

Starting Faure
Starting Schubert
Starting Cambini
Starting Ravel
Starting Bach
Starting Dvorak
Starting Brahms
Starting Mozart
Starting Beethoven
Starting Haydn


In [218]:
mid = pretty_midi.PrettyMIDI(base_path + 'Haydn/2104_op64n5_1.mid')
mid.instruments

[Instrument(program=40, is_drum=False, name="Violin 1"),
 Instrument(program=40, is_drum=False, name="Violin 2"),
 Instrument(program=41, is_drum=False, name="Viola"),
 Instrument(program=42, is_drum=False, name="Violoncello")]

674


In [255]:
print(mid.instruments[0].notes[674]) # 674 and 675 are the same note

Note(start=276.217216, end=276.309321, pitch=78, velocity=49)


In [None]:
mid = pretty_midi.PrettyMIDI('musicnet_midis/Brahms/2112_jb38_1.mid')

# Number of time steps needed to encode the file. +1 because of the 0 step
tsteps = int(round(mid.get_end_time()*steps_per_sec) + 1)

data = np.zeros((len(mid.instruments), tsteps, action_dim))

for i, instrument in enumerate(mid.instruments):
    for n, note in enumerate(instrument.notes):
        start = int(round(note.start*steps_per_sec))
        end = int(round(note.end*steps_per_sec))

        data[i, start, note.pitch] = 1 # Turn on the note
        data[i, end, num_notes + note.pitch] = 1 # Turn off the note
        data[i, start, 2*num_notes + note.pitch] = note.velocity/127