In [1]:
from music21 import *
import os 

In [2]:
def open_midi(midi_path):
    mf = converter.parse(midi_path)
    return mf

In [13]:
mf = open_midi('C:/Users/PC/Desktop/Music Generation/dataset/10_Aion_Song_of_Moonlight.mid')

In [14]:
# Play midi file
mf.show("midi")

In [16]:
#mf.show() # Display sheet music in musescore

In [17]:
# Check if there are notes which should have ended before given offset
def checkForNoteOffEvent(currentOffset, noteOffEvents):
    notesToEnd = []
    
    for noteOffEvent in noteOffEvents: # for (notename, endingOffset)
        if noteOffEvent[1] <= currentOffset:
            notesToEnd.append(noteOffEvent)
            
    return notesToEnd

In [18]:
# Restricts possible velocities to 8 values, keeping the number of unique note events smaller
# Resembles ppp, pp, p, mp, mf, f, ff, fff dynamics 
def vModifier(velocity):
    if (velocity == 0):
        return 0
    
    velocity = min(127, ((velocity // 16) + 1) * 16)
    return velocity

def tModifier(tempo):
    if (tempo == 0):
        return 0
    
    tempo = ((tempo // 10) + 1) * 10
    return tempo

In [19]:
# Access midifile with Parts merged together with correct offsets

def midi2text(midifile):
    previousElementOffset = 0.0
    offsetChanged = False

    tempoRetrieved = False
    timeSigRetrieved = False
    
    currentVelocity = 0

    tokens = []
    noteOffEvents = []

    tokens.append("START")

    for element in midifile.flat.elements:
        #print(type(element))

        currentElementOffset = element.offset

        notesToEnd = checkForNoteOffEvent(currentElementOffset, noteOffEvents)

        if (len(notesToEnd) != 0):
            for noteToEnd in notesToEnd:
                difference = float(noteToEnd[1]) - float(previousElementOffset)
                if (difference > 0.01):
                    tokens.append("wait:" + str(round(difference, 5)))
                    previousElementOffset = noteToEnd[1]
                tokens.append("note:" + str(noteToEnd[0]) + ":OFF")
                noteOffEvents.remove(noteToEnd)

        # If offset has increased and we're looking at new notes, add a wait event before adding the new notes
        if (float(currentElementOffset) > float(previousElementOffset + 0.01) and (isinstance(element, note.Note) or isinstance(element, chord.Chord))):
            offsetChanged = True
            difference = float(currentElementOffset - previousElementOffset)
            tokens.append("wait:" + str(round(difference, 5)))

        if (isinstance(element, tempo.MetronomeMark) and not tempoRetrieved):
            tempoRetrieved = True
            tokens.append("tempo:" + str(tModifier(element.number)))

        if (isinstance(element, meter.TimeSignature) and not timeSigRetrieved):
            timeSigRetrieved = True
            tokens.append("timesig:" + str(element.ratioString))

        if (isinstance(element, note.Note)): # This is a note event, add a token for this note
            if (currentVelocity != vModifier(element.volume.velocity)):
                currentVelocity = vModifier(element.volume.velocity)
                tokens.append("velocity:" + str(currentVelocity))
            tokens.append("note:" + str(element.pitch))
            noteOffEvents.append((str(element.pitch), float(currentElementOffset + element.duration.quarterLength), 5))

        if (isinstance(element, chord.Chord)): # This is a chord event, add a token for each note in chord
            for chordnote in element:
                if (currentVelocity != vModifier(element.volume.velocity)):
                    currentVelocity = vModifier(element.volume.velocity)
                    tokens.append("velocity:" + str(currentVelocity))
                tokens.append("note:" + str(chordnote.pitch))
                noteOffEvents.append((str(chordnote.pitch), float(currentElementOffset + element.duration.quarterLength)))

        if (offsetChanged):
            previousElementOffset = currentElementOffset
            offsetChanged = False

    # Finally make sure that all notes that end after the offset of the last element of mf.flat.elements are given an off event.
    for noteToEnd in noteOffEvents.copy():
        difference = float(noteToEnd[1]) - float(previousElementOffset)
        if (difference > 0.01):
            tokens.append("wait:" + str(round(difference, 5)))
            previousElementOffset = noteToEnd[1]
        tokens.append("note:" + str(noteToEnd[0]) + ":OFF")
        noteOffEvents.remove(noteToEnd)
        
    if (len(noteOffEvents) != 0):
        print("Not all notes have note-off events")

    tokens.append("END")
    return tokens

midiTokens = midi2text(mf)

In [20]:
#midiTokens = []
#path = '../dataset/'
#filenames = os.listdir(path)
#for filename in filenames:
#    midifile = open_midi(path + filename)
#    midiTokens.append(midi2text(midifile))

In [24]:
#from tensorflow.keras.preprocessing.text import Tokenizer
#from fractions import Fraction
#
#tokenizer = Tokenizer() # token -> int
#tokenizer.fit_on_texts(midiTokens)
#
#toknotes = []
#
#for i, j in tokenizer.word_index.items():
#    if (i.startswith("note")):
#        toknotes.append(i)
#
#
#toknotes = sorted(list(set([toknote.split(":")[1] for toknote in toknotes])))
#for i in toknotes:
#    print(i)

In [22]:
def text2midi(tokens):
    s = stream.Stream()
    
    currentVelocity = 0
    
    currentOffset = 0
    currentToken = 0

    for token in tokens:

        splitToken = token.split(":")

        if token.startswith("tempo"):
            s.append(tempo.MetronomeMark(number=float(splitToken[1])))

        if token.startswith("timesig"):
            s.append(meter.TimeSignature(splitToken[1]))
            
        if token.startswith("velocity"):
            currentVelocity = int(splitToken[1])

        if token.startswith("note") and not token.lower().endswith("off"):
            noteDuration = 0
            noteName = splitToken[1]

            for element in tokens[currentToken+1:]:
                splitToken2 = element.split(":")
                if (element.startswith("wait")):
                    noteDuration += float(splitToken2[1])
                if (element.startswith("note") and element.lower().endswith("off")):
                    if (noteName == splitToken2[1]):
                        newNote = note.Note(nameWithOctave=splitToken[1],  
                               quarterLength=round(float(noteDuration), 5))
                        newNote.volume.velocity = currentVelocity
                        s.insert(currentOffset, newNote)
                        break

        if token.startswith("wait"):
            currentOffset += float(splitToken[1]) 

        currentToken += 1

    return s

text2midi(midiTokens).show("midi")

In [12]:
path = '../dataset/'
filenames = os.listdir(path)

with open("../dataset_text/miditokens_waitFix.txt", "a") as f:
    progress = 0
    for filename in filenames:
        midifile = open_midi(path + filename)
        f.write(' '.join(midi2text(midifile)) + '\n')
        progress += 1
        
        if (progress % 20 == 0):
            print('.', end='')

....................................