In [98]:
from music21 import *
import numpy as np

In [202]:
#test_path = "../dataset/combined/alb_esp1.mid"
#test_path = "../dataset/combined/DEB_CLAI.mid"
test_path = "../dataset/combined/CDL.mid"

In [213]:
# Function to open a midi file for further manipulation
# Params: string path to a midi file
def open_midi_file(midi_file_path):
    midifile = converter.parse(midi_file_path)
    return midifile

# Function to open a midi file's sheet variant in MuseScore, requires MuseScore to be installed
# PS: If at first it doesn't work, try following http://web.mit.edu/music21/doc/installing/installWindows.html#install-music21
# If it still doesn't work, try running the function a second time for the heck of it. Environment variables be crazy.
# Params: string path to a midi file
def open_midi_file_musescore(midi_file_path):
    midi_file = open_midi_file(midi_file_path)
    midi_file.show()
    
# Function to play a midi file
# Params: string path to a midi file
def play_midi_file(midi_file_path):
    midi_file = open_midi_file(midi_file_path)
    midi_file.show("midi")
    
# Function to make sure that a list of tokens contains a stop_note event for every note starting event
# Works similarly to adding a note in the tokens to midi conversion function
# Params: a list of tokens
# Returns true if all notes end properly, false if not
def validate_tokens(tokens):
    
    # Keeps track of the current token
    current_token_index = 0
    
    for token in tokens:
        token_type = token.split(":")[0]
        token_value = token.split(":")[1]
        
        if token_type == "note":
            found_corresponding_end_note = False
            note_midi_pitch = token_value
            
            for following_token in tokens[current_token_index + 1:]:
                following_token_type = following_token.split(":")[0]
                following_token_value = following_token.split(":")[1]
                    
                if following_token_type == "stop_note":
                    stopped_note_pitch = following_token_value
                    if (note_midi_pitch == stopped_note_pitch):
                        found_corresponding_end_note = True
                        break
            
            if not found_corresponding_end_note:
                return False
                
    return True
    
# Function to turn a midi file into text tokens
# Params: string path to a midi file
def convert_midi_to_tokens(midi_file_path):
    midi_file = open_midi_file(midi_file_path)
    
    # A list to hold tokens
    tokens = list()
    
    # A list to keep track of which note events await a corresponding note off event
    notes_to_stop = list()
    
    # Keeps track of time since start of the midi piece
    current_offset = 0
    
    # Keeps track of the offset of the previously handled midi event
    previous_offset = None
    
    # Iterate over all midi events, sorted by offset (ascending)
    # and handle which tokens will be added to list of tokens
    for midi_event in midi_file.flat.elements:
        
        current_offset = round(float(midi_event.offset), 3)
        
        # At the end of the current loop, tokens in this list will be added to the final tokens list
        tokens_to_add = list()
        
        # Check if there are notes that should have ended between the last and current offset (included) 
        if len(notes_to_stop) != 0:
            for note_to_stop, when_to_stop in notes_to_stop.copy():
                if when_to_stop <= current_offset:
                    time_since_prev_offset = round(float(when_to_stop - previous_offset), 3)
                    if time_since_prev_offset > 0:
                        tokens_to_add.append("wait:" + str(time_since_prev_offset))
                        previous_offset = when_to_stop
                    tokens_to_add.append("stop_note:" + note_to_stop)
                    notes_to_stop.remove([note_to_stop, when_to_stop])
        
        # If the offset has changed by >0, account for it by adding a waiting token
        if (previous_offset != None and isinstance(midi_event, (note.Note, chord.Chord))):
            offset_change = round(float(current_offset - previous_offset), 3)
            if offset_change > 0:
                tokens_to_add.append("wait:" + str(offset_change))
        
        if isinstance(midi_event, meter.TimeSignature):
            time_signature_value = str(midi_event.ratioString)
            tokens_to_add.append("timesignature:" + time_signature_value)
        
        if isinstance(midi_event, tempo.MetronomeMark):
            tempo_value = str(midi_event.number)
            tokens_to_add.append("tempo:" + tempo_value)
        
        # If the current midi event is a note, add a note token along with its midi pitch number
        # And remember when the note needs to be stopped
        if isinstance(midi_event, note.Note):
            midi_pitch = str(midi_event.pitch.midi)
            token_string = "note:" + midi_pitch
            note_end_offset = round(float(current_offset + midi_event.duration.quarterLength), 3)
            
            tokens_to_add.append(token_string)
            notes_to_stop.append([midi_pitch, note_end_offset])
        
        # If the current midi event is a chord, do the same as before for every note in the chord
        if isinstance(midi_event, chord.Chord):
            for individual_note in midi_event:
                midi_pitch = str(individual_note.pitch.midi)
                token_string = "note:" + midi_pitch
                note_end_offset = round(float(current_offset + individual_note.duration.quarterLength), 3)

                tokens_to_add.append(token_string)
                notes_to_stop.append([midi_pitch, note_end_offset])
        
        tokens.extend(tokens_to_add)
        previous_offset = current_offset
    
    # After iterating through all midi events, it is necessary to check for note stopping events one more time,
    # since the last midi event could have been a note starting event
    tokens_to_add = list()
    if len(notes_to_stop) != 0:
        for note_to_stop, when_to_stop in notes_to_stop.copy():
            tokens_to_add.append("stop_note:" + note_to_stop)
            notes_to_stop.remove([note_to_stop, when_to_stop])
    tokens.extend(tokens_to_add)
    
    return tokens

# Function to convert list of text tokens to a Music21 midi stream
# Params: a list of tokens
def convert_tokens_to_midi(tokens):
    
    # A midi stream that will hold midi events converted from tokens
    midi_stream = stream.Stream()
    
    # Keeps track of the current token
    current_token_index = 0
    
    # Keeps track of offset
    current_offset = 0
    
    for token in tokens:
        token_type = token.split(":")[0]
        token_value = token.split(":")[1]
        
        if token_type == "timesignature":
            timesignature_value = token_value
            midi_stream.append(meter.TimeSignature(timesignature_value))
        
        if token_type == "tempo":
            tempo_value = float(token_value)
            midi_stream.append(tempo.MetronomeMark(number=tempo_value))
        
        # Converting a note-starting token to a midi event, we need to know its duration.
        # To find the duration, we look at the following tokens until we find a corresponding stop_note token.
        # While searching for the stop_note token, we add up the values of intermediate wait tokens, denoting duration.
        # We identify the corresponding stop_note by the note's midi pitch number. 
        if token_type == "note":
            note_duration = 0
            note_midi_pitch = token_value

            for following_token in tokens[current_token_index + 1:]:
                following_token_type = following_token.split(":")[0]
                following_token_value = following_token.split(":")[1]
                
                if following_token_type == "wait":
                    wait_duration = following_token_value
                    note_duration += float(wait_duration)
                    
                if following_token_type == "stop_note":
                    stopped_note_pitch = following_token_value
                    if (note_midi_pitch == stopped_note_pitch):
                        new_note = note.Note(int(note_midi_pitch))  
                        new_note.quarterLength = round(float(note_duration), 3)
                        midi_stream.insert(current_offset, new_note)
                        break

        if token_type == "wait":
            wait_duration = token_value
            current_offset += float(wait_duration)

        current_token_index += 1

    return midi_stream

In [20]:
midi_file = open_midi_file(test_path)

In [201]:
open_midi_file_musescore(test_path)

In [204]:
play_midi_file(test_path)

In [214]:
validate_tokens(convert_midi_to_tokens(test_path))

True

In [216]:
" ".join(convert_midi_to_tokens(test_path))

'tempo:48.0 timesignature:9/8 wait:0.5 note:65 note:68 wait:0.5 note:77 note:80 wait:0.5 stop_note:65 stop_note:68 wait:0.5 stop_note:77 stop_note:80 wait:1.0 note:73 note:77 wait:1.0 stop_note:73 stop_note:77 wait:0.25 note:66 note:69 wait:0.5 note:72 note:75 wait:0.5 stop_note:66 stop_note:69 note:73 note:77 wait:0.5 stop_note:72 stop_note:75 note:72 note:75 wait:0.5 stop_note:73 stop_note:77 wait:0.5 stop_note:72 stop_note:75 wait:0.25 note:65 note:68 wait:0.167 note:70 note:73 wait:0.5 stop_note:65 stop_note:68 note:72 note:75 wait:0.5 stop_note:70 stop_note:73 note:73 note:70 wait:0.5 stop_note:72 stop_note:75 wait:0.25 stop_note:73 note:77 wait:1.5 stop_note:77 note:73 wait:0.25 note:63 note:66 wait:0.5 stop_note:70 stop_note:73 note:68 note:72 wait:0.5 stop_note:63 stop_note:66 note:70 note:73 wait:0.5 stop_note:68 stop_note:72 note:68 note:72 wait:0.5 stop_note:70 stop_note:73 wait:2.25 stop_note:68 tempo:49.0 note:66 note:63 note:61 wait:0.333 stop_note:72 wait:0.167 note:70 w