In [34]:
import glob
import pretty_midi

def get_piano_rolls(midi_files, fs):
    result = []
    for idx, file in enumerate(midi_files):
        print(f'Processing {file}')
        
        # read midi file
        midi = pretty_midi.PrettyMIDI(file)
        
        # must have valid piano instrument (using 'not in' as there are variants of Piano like Grand Piano)
        if not len(midi.instruments) or 'Piano' not in midi.instruments[0].name:
            print(f'\tCannot process file as the first instrument is not a Piano.')
            continue
        #end
        
        result.append(midi.get_piano_roll(fs=fs))
    #end
    print('Done!')
    return result
#end

all_midi_files = glob.glob('./data/*.mid')
all_piano_rolls = get_piano_rolls(['./data/C_mapleaf.mid'], 100)#[all_midi_files[0]], 3)

Processing ./data/C_mapleaf.mid
Done!




In [35]:
import numpy as np

SEQUENCE_LENGTH = 50

def process_piano_roll(roll, seq_len):
    times = np.unique(np.where(roll > 0)[1])
    index = np.where(roll > 0)
    
    time_note_dict = {}
    for time in times:
        index_where = np.where(index[1] == time)
        notes = index[0][index_where]
        time_note_dict[time] = notes
    #end
    return time_note_dict
#end

for roll in all_piano_rolls:
    notes = process_piano_roll(roll, SEQUENCE_LENGTH)
#end

In [36]:
class NoteTokenizer(object):
    def __init__(self):
        self.notes_to_index = {}
        self.index_to_notes = {}
        self.num_words = 0
        self.unique_words = 0
        self.notes_freq = {}
    #end
    
    def transform(self, array):
        xformed_array = []
        for el in array:
            xformed_array.append([self.notes_to_index[note] for note in el])
        #end
        return np.array(xformed_array, dtype=np.int32)
    #end
    
    def partial_fit(self, notes):
        for note in notes:
            note_str = ','.join(str(a) for a in note)
            if note_str in self.notes_freq:
                self.notes_freq[note_str] += 1
                self.num_words += 1
            else:
                self.notes_freq[note_str] = 1
                self.unique_words += 1
                self.num_words += 1
                self.notes_to_index[note_str], self.index_to_notes[self.unique_words] = self.unique_words,  note_str
        #end
    #end
        
    def add_new_note(self, note):
        assert note not in self.notes_to_index
        self.unique_words += 1
        self.notes_to_index[note], self.index_to_notes[self.unique_words] = self.unique_words, note
    #end
#end

In [37]:
def piano_roll_to_midi(roll, fs):
    notes, frames = roll.shape
    pm = pretty_midi.PrettyMIDI()
    pm.time_signature_changes = [pretty_midi.TimeSignature(2, 4, 0)]
    instrument = pretty_midi.Instrument(program=0)
    
    # padd 1 column of zeros to acknowledge initial and ending events
    roll = np.pad(roll, [(0,0), (1,1)], 'constant')
    
    # use changes in velocity to find note on/off events
    vel_changes = np.nonzero(np.diff(roll).T)
    
    # keep track of velocities and note on times
    prev_vels = np.zeros(notes, dtype=int)
    notes_on_time = np.zeros(notes)
    for time, note in zip(*vel_changes):
        vel = roll[note, time+1]
        time = time / fs
        if vel > 0:
            if prev_vels[note] == 0:
                notes_on_time[note] = time
                prev_vels[note] = vel
            #end
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_vels[note],
                pitch=note,
                start=notes_on_time[note],
                end=time
            )
            instrument.notes.append(pm_note)
            prev_vels[note] = 0
        #end
    #end
    pm.instruments.append(instrument)
    return pm
#end

midi_out = piano_roll_to_midi(all_piano_rolls[0], 100)
for note in midi_out.instruments[0].notes:
    note.velocity = 100
midi_out.write('./output.mid')