In [51]:
from mido import MidiFile, MidiTrack
from mido import Message, MidiFile, MidiTrack
import os
from mido import MetaMessage
import random

# directory name should be midis, at the same level as this file. 

In [53]:
# for i, track in enumerate(mid.tracks):
#     print('Track {}: {}'.format(i, track.name))
#     for msg in track:
#         if msg.type == 'key_signature':
#             print(msg)
            

In [54]:
# This function will take in a folder and convert each MIDI file into a list of tokens where each token is a note value of the piece
# It will return a 2D list where each row is a piece and each column represents a token/note of the piece
def tokenize_pieces(midi_folder):
    midi_pieces = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == 'Piano right':
                for msg in track:
                    if msg.type == 'note_on':
                         note_token_sequence.append(msg.note)
        midi_pieces.append(note_token_sequence)
    return midi_pieces

In [55]:
# This function will generate n grams given a list of token sequences
def create_n_grams(n, token_sequences):
    
    paddings = n - 1
    start_padding = "<start>"
    end_padding = "<end>"
    n_grams_count = {}

    # Add the start padding (<start>) n-1 number of times
    if paddings > 0:
        for i in range(0, len(token_sequences)):
            for j in range(0, paddings):
                token_sequences[i].insert(0, start_padding)

    # Add the end padding (<end>) once to the end of each sequence
    for i in range(0, len(token_sequences)):
        token_sequences[i].append(end_padding)

    # Iterate through each word in each sequence and using slicing to get the n gram, then add to dictionary/update count
    for sequence in token_sequences:
        for i in range(len(sequence)-n+1): # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = sequence[i:i+n] # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = tuple(gram)
            if gram in n_grams_count:
                n_grams_count[gram] += 1
            else:
                n_grams_count[gram] = 1

    return n_grams_count


In [56]:
def generate_song(n_gram_frequencies, k):
    
    # We need to first determine what the "n" is from the input frequency dictionary
    n = len(tuple(n_gram_frequencies.keys())[0])

    # Start with an empty tweet
    song_sequence = []

    # For bigrams and up...
    if n - 1 > 0:
        for i in range(0, n-1):
            song_sequence.insert(0, "<start>")
    
        while song_sequence[len(song_sequence)-1] != "<end>":
            
            # Slice the song so we can get the previous n-1 gram that comes before the predicted token
            previous_token_sequence = song_sequence[len(song_sequence)-(n-1):len(song_sequence)]

            # Using dictionary comprehension, create a dictionary containing all n-grams that contain the previous token sequence
            matched_dictionary = {k:v for k,v in n_gram_frequencies.items() if k[0:n - 1] == tuple(previous_token_sequence)}

            # Prepare a list of possible choices along with their corresponding weights
            choices = []
            choices_weights = []

            # Populate the choices and weights by iterating through the keys in the matched dictionary 
            matched_dictionary_keylist = list(matched_dictionary.keys())
            for j in range(0, len(matched_dictionary_keylist)):
                # insert the last word of the tuple key insert it as a choice
                choices.append(matched_dictionary_keylist[j][len(matched_dictionary_keylist[j])-1])

                # Weights are calculated by taking the actual count of the key and dividing it by all that is found
                choices_weights.append(matched_dictionary[matched_dictionary_keylist[j]] / len(matched_dictionary) + k)
            
            chosen = random.choices(choices, weights=choices_weights, k=1)
            song_sequence.append(chosen[0])
    else:
        # Special case for unigrams only
        choices = []
        choices_weights = []
        n_gram_frequencies_keylist = list(n_gram_frequencies.keys())
        for j in range(0, len(n_gram_frequencies_keylist)):
            # insert the last word of the tuple key as a choice
            choices.append(n_gram_frequencies_keylist[j][0])
            choices_weights.append(n_gram_frequencies[n_gram_frequencies_keylist[j]] / len(n_gram_frequencies) + k)
        
        # Adding the first item so tweet isn't empty
        song_sequence.append(random.choices(choices, weights=choices_weights, k=1)[0])
        while song_sequence[len(song_sequence)-1] != "<end>":
            chosen = random.choices(choices, weights=choices_weights, k=1)
            song_sequence.append(chosen[0])
    
    # Removing the paddings and converting the grams of the tweet into a string
    # return ' '.join([word for word in tweet if word != "<start>" and word != "<end>"])
    for x in range(n-1):
        song_sequence.pop(0)
    song_sequence.pop(-1)
    return song_sequence

In [57]:
# Our bigram of notes!
song_token_sequences = create_n_grams(4, tokenize_pieces("./midis"))
song = generate_song(song_token_sequences, 0)

In [58]:
print(song)

[64, 52, 57, 64, 60, 62, 65, 64, 62, 60, 64, 67, 59, 65, 64, 59, 60, 62, 65, 64, 60, 67, 72, 60, 63, 68, 72, 60, 63, 62, 59, 63, 67, 72, 75, 74, 72, 71, 69, 67, 65, 64, 61, 62, 64, 65, 67, 69, 64, 60, 65, 68, 72, 60, 63, 68, 72, 63, 68, 75, 63, 69, 76, 64, 67, 65, 64, 69, 67, 72, 76, 77, 76, 77, 76, 77, 81, 81, 71, 72, 75, 76, 68, 76, 68, 74, 68, 64, 58, 62, 61, 57, 55, 58, 54, 57, 55, 58, 54, 59, 55, 59, 62, 67, 57, 60, 62, 67, 59, 64, 67, 66, 67, 72, 67, 76, 77, 79, 77, 76, 74, 72, 80, 74, 72, 71, 74, 79, 78, 77, 76, 75, 74, 76, 80, 89, 77, 88, 76, 86, 74, 73, 85, 88, 90, 93, 91, 89, 88, 86, 84, 82, 81, 79, 78, 79, 81, 79, 77, 76, 77, 78, 79, 78, 79, 84, 79, 84, 84, 89, 84, 87, 80, 86, 80, 74, 90, 82, 78, 90, 90, 90, 91, 93, 91, 88, 84, 81, 79, 81, 84, 84, 86, 88, 90, 91, 90, 88, 86, 85, 88, 74, 78, 73, 76, 74, 71, 67, 72, 75, 74, 72, 70, 72, 69, 70, 71, 72, 74, 76, 74, 72, 71, 72, 74, 67, 67, 72, 76, 75, 76, 79, 64, 67, 72, 79, 67, 74, 72, 71, 74, 72, 71, 72, 71, 65, 59, 59, 65, 71,

In [59]:
def create_midi(song):
    mid = MidiFile()
    track = MidiTrack()
    mid.tracks.append(track)
    for note in song: 
        track.append(mido.Message('note_on', note = note, time = 0))
        track.append(mido.Message('note_off', note = note, time = 256))

    mid.save('new_song2.mid')
    

In [60]:
create_midi(song)