In [11]:
from mido import MidiFile, MidiTrack
import os
from mido import MetaMessage
import random

# directory name should be midis, at the same level as this file. 

In [1]:
for i, track in enumerate(mid.tracks):
    print('Track {}: {}'.format(i, track.name))
    for msg in track:
        if msg.type == 'key_signature':
            print(msg)
            

In [12]:
# This function will take in a folder and convert each MIDI file into a list of tokens where each token is a note value of the piece
# It will return a 2D list where each row is a piece and each column represents a token/note of the piece
def tokenize_pieces(midi_folder):
    midi_pieces = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == 'Piano right':
                for msg in track:
                    if msg.type == 'note_on':
                         note_token_sequence.append(msg.note)
        midi_pieces.append(note_token_sequence)
    return midi_pieces

In [13]:
# This function will generate n grams given a list of token sequences
def create_n_grams(n, token_sequences):
    
    paddings = n - 1
    start_padding = "<start>"
    end_padding = "<end>"
    n_grams_count = {}

    # Add the start padding (<start>) n-1 number of times
    if paddings > 0:
        for i in range(0, len(token_sequences)):
            for j in range(0, paddings):
                token_sequences[i].insert(0, start_padding)

    # Add the end padding (<end>) once to the end of each sequence
    for i in range(0, len(token_sequences)):
        token_sequences[i].append(end_padding)

    # Iterate through each word in each sequence and using slicing to get the n gram, then add to dictionary/update count
    for sequence in token_sequences:
        for i in range(len(sequence)-n+1): # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = sequence[i:i+n] # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = tuple(gram)
            if gram in n_grams_count:
                n_grams_count[gram] += 1
            else:
                n_grams_count[gram] = 1

    return n_grams_count


In [14]:
# Our bigram of notes!
song_token_sequences = create_n_grams(2, tokenize_pieces("./midis"))

In [21]:
def generate_song(n_gram_frequencies, k):
    
    # We need to first determine what the "n" is from the input frequency dictionary
    n = len(tuple(n_gram_frequencies.keys())[0])

    # Start with an empty tweet
    tweet = []

    # For bigrams and up...
    if n - 1 > 0:
        for i in range(0, n-1):
            tweet.insert(0, "<start>")
    
        while tweet[len(tweet)-1] != "<end>":
            
            # Slice the tweet so we can get the previous n-1 gram that comes before the predicted token
            previous_token_sequence = tweet[len(tweet)-(n-1):len(tweet)]

            # Using dictionary comprehension, create a dictionary containing all n-grams that contain the previous token sequence
            matched_dictionary = {k:v for k,v in n_gram_frequencies.items() if k[0:n - 1] == tuple(previous_token_sequence)}

            # Prepare a list of possible choices along with their corresponding weights
            choices = []
            choices_weights = []

            # Populate the choices and weights by iterating through the keys in the matched dictionary 
            matched_dictionary_keylist = list(matched_dictionary.keys())
            for j in range(0, len(matched_dictionary_keylist)):
                # insert the last word of the tuple key insert it as a choice
                choices.append(matched_dictionary_keylist[j][len(matched_dictionary_keylist[j])-1])

                # Weights are calculated by taking the actual count of the key and dividing it by all that is found
                choices_weights.append(matched_dictionary[matched_dictionary_keylist[j]] / len(matched_dictionary) + k)
            
            chosen = random.choices(choices, weights=choices_weights, k=1)
            tweet.append(chosen[0])
    else:
        # Special case for unigrams only
        choices = []
        choices_weights = []
        n_gram_frequencies_keylist = list(n_gram_frequencies.keys())
        for j in range(0, len(n_gram_frequencies_keylist)):
            # insert the last word of the tuple key as a choice
            choices.append(n_gram_frequencies_keylist[j][0])
            choices_weights.append(n_gram_frequencies[n_gram_frequencies_keylist[j]] / len(n_gram_frequencies) + k)
        
        # Adding the first item so tweet isn't empty
        tweet.append(random.choices(choices, weights=choices_weights, k=1)[0])
        while tweet[len(tweet)-1] != "<end>":
            chosen = random.choices(choices, weights=choices_weights, k=1)
            tweet.append(chosen[0])
    
    # Removing the paddings and converting the grams of the tweet into a string
    # return ' '.join([word for word in tweet if word != "<start>" and word != "<end>"])
    tweet.pop(0)
    tweet.pop(-1)
    return tweet

In [26]:
song = generate_song(song_token_sequences, 0)


78
76


In [60]:
# make dictionary 
# def createNoteDict(msgs):
#     note_dict = {}
#     for msg in msgs:
#         if msg.type == 'note_on':
#             #print(msg.note)
#             note = msg.note
#             if note in note_dict.keys():
#                 note_dict[note] += 1
#             else:
#                 note_dict[note] = 1    
#     sorted_note_dict = sorted(note_dict.items(), key=lambda x: x[1], reverse=True)
#     return sorted_note_dict