In [14]:
import mido
import os
import random
from mido import MidiFile

# directory name should be midis, at the same level as this file. 

In [17]:
# for i, track in enumerate(mid.tracks):
#     print('Track {}: {}'.format(i, track.name))
#     for msg in track:
#         if msg.type == 'key_signature':
#             print(msg)
            

In [18]:
# This function will take in a folder and convert each MIDI file into a list of tokens where each token is a note value of the piece
# It will return a 2D list where each row is a piece and each column represents a token/note of the piece
def tokenize_pieces(midi_folder):
    midi_pieces = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == 'Piano right':
                for msg in track:
                    if msg.type == 'note_on':
                         note_token_sequence.append(msg.note)
        midi_pieces.append(note_token_sequence)
    return midi_pieces

In [19]:
# This function will generate n grams given a list of token sequences
def create_n_grams(n, token_sequences):
    
    paddings = n - 1
    start_padding = "<start>"
    end_padding = "<end>"
    n_grams_count = {}

    # Add the start padding (<start>) n-1 number of times
    if paddings > 0:
        for i in range(0, len(token_sequences)):
            for j in range(0, paddings):
                token_sequences[i].insert(0, start_padding)

    # Add the end padding (<end>) once to the end of each sequence
    for i in range(0, len(token_sequences)):
        token_sequences[i].append(end_padding)

    # Iterate through each word in each sequence and using slicing to get the n gram, then add to dictionary/update count
    for sequence in token_sequences:
        for i in range(len(sequence)-n+1): # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = sequence[i:i+n] # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = tuple(gram)
            if gram in n_grams_count:
                n_grams_count[gram] += 1
            else:
                n_grams_count[gram] = 1

    return n_grams_count


In [20]:
def generate_song(n_gram_frequencies, k):
    
    # We need to first determine what the "n" is from the input frequency dictionary
    n = len(tuple(n_gram_frequencies.keys())[0])

    # Start with an empty tweet
    song_sequence = []

    # For bigrams and up...
    if n - 1 > 0:
        for i in range(0, n-1):
            song_sequence.insert(0, "<start>")
    
        while song_sequence[len(song_sequence)-1] != "<end>":
            
            # Slice the song so we can get the previous n-1 gram that comes before the predicted token
            previous_token_sequence = song_sequence[len(song_sequence)-(n-1):len(song_sequence)]

            # Using dictionary comprehension, create a dictionary containing all n-grams that contain the previous token sequence
            matched_dictionary = {k:v for k,v in n_gram_frequencies.items() if k[0:n - 1] == tuple(previous_token_sequence)}

            # Prepare a list of possible choices along with their corresponding weights
            choices = []
            choices_weights = []

            # Populate the choices and weights by iterating through the keys in the matched dictionary 
            matched_dictionary_keylist = list(matched_dictionary.keys())
            for j in range(0, len(matched_dictionary_keylist)):
                # insert the last word of the tuple key insert it as a choice
                choices.append(matched_dictionary_keylist[j][len(matched_dictionary_keylist[j])-1])

                # Weights are calculated by taking the actual count of the key and dividing it by all that is found
                choices_weights.append(matched_dictionary[matched_dictionary_keylist[j]] / len(matched_dictionary) + k)
            
            chosen = random.choices(choices, weights=choices_weights, k=1)
            song_sequence.append(chosen[0])
    else:
        # Special case for unigrams only
        choices = []
        choices_weights = []
        n_gram_frequencies_keylist = list(n_gram_frequencies.keys())
        for j in range(0, len(n_gram_frequencies_keylist)):
            # insert the last word of the tuple key as a choice
            choices.append(n_gram_frequencies_keylist[j][0])
            choices_weights.append(n_gram_frequencies[n_gram_frequencies_keylist[j]] / len(n_gram_frequencies) + k)
        
        # Adding the first item so tweet isn't empty
        song_sequence.append(random.choices(choices, weights=choices_weights, k=1)[0])
        while song_sequence[len(song_sequence)-1] != "<end>":
            chosen = random.choices(choices, weights=choices_weights, k=1)
            song_sequence.append(chosen[0])
    
    # Removing the paddings and converting the grams of the tweet into a string
    # return ' '.join([word for word in tweet if word != "<start>" and word != "<end>"])
    for x in range(n-1):
        song_sequence.pop(0)
    song_sequence.pop(-1)
    return song_sequence

In [21]:
# Our bigram of notes!
song_token_sequences = create_n_grams(4, tokenize_pieces("./midis"))
song = generate_song(song_token_sequences, 0)

In [22]:
print(song)

[88, 100, 88, 100, 88, 76, 88, 100, 88, 100, 101, 100, 101, 100, 101, 100, 101, 100, 86, 98, 100, 98, 84, 96, 79, 91, 86, 91, 88, 87, 90, 93, 89, 88, 84, 83, 84, 82, 81, 80, 80, 62, 69, 68, 64, 76, 78, 80, 81, 85, 81, 84, 85, 80, 92, 85, 90, 89, 93, 92, 85, 90, 85, 81, 78, 73, 69, 66, 62, 69, 66, 62, 62, 65, 63, 62, 60, 66, 54, 55, 60, 67, 72, 60, 67, 77, 60, 67, 72, 60, 71, 67, 59, 71, 67, 59, 71, 65, 70, 71, 60, 65, 69, 60, 67, 64, 60, 57, 57, 69, 64, 52, 69, 57, 81, 69, 72, 77, 76, 72, 76, 72, 76, 69, 81, 70, 82, 74, 73, 74, 76, 77, 77, 79, 81, 83, 84, 84, 84, 81, 77, 89, 77, 82, 89, 74, 77, 67, 73, 67, 73, 77, 67, 73, 75, 72, 81, 81, 89, 89, 88, 88, 87, 67, 66, 65, 64, 63, 64, 67, 65, 62, 60, 64, 72, 71, 59, 65, 55, 59, 62, 67, 55, 55, 59, 62, 67, 65, 67, 65, 67, 64, 67, 67, 79, 77, 81, 73, 74, 76, 74, 68, 67, 73, 76, 67, 72, 78, 79, 71, 79, 73, 70, 67, 73, 76, 79, 76, 84, 83, 72, 81, 79, 83, 81, 78, 87, 82, 83, 76, 77, 75, 76, 79, 77, 76, 74, 81, 76, 73, 69, 64, 64, 57, 53, 65, 53

In [None]:
def create_midi(song):
    mid = MidiFile()
    track = MidiTrack()
    mid.tracks.append(track)
    for note in song: 
        track.append(mido.Message('note_on', note = note, time = 0))
        track.append(mido.Message('note_off', note = note, time = 256))
        track.append(mido.Message('note_on', note = 67, time = 0))
        track.append(mido.Message('note_on', note = 59, time = 0))
        track.append(mido.Message('note_on', note = 79, time = 0))
        track.append(mido.Message('note_off', note = 67, time = 256))
        track.append(mido.Message('note_off', note = 59, time = 256))
        track.append(mido.Message('note_off', note = 79, time = 256))

    mid.save('new_song89.mid')
    

In [None]:
create_midi(song)

In [17]:
def create_list_of_messages(midi_folder):
    list_of_messages = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == 'Piano right':
                for msg in track:
                    if msg.type == 'note_on':
                        #note_token_sequence.append(msg.note)
                        list_of_messages.append(msg)
    return list_of_messages

In [None]:
def create_times_list(list_of_messages):
    times_list = [0]
    for msg in list_of_messages:
        time = msg.time
        times_list.append(sum(times_list) + time)
    return times_list

In [None]:
list_of_messages = create_list_of_messages("./midis")
print(len(list_of_messages))


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)






IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:
create_times_list(list_of_messages)