In [45]:
from mido import MidiFile, MidiTrack
from mido import Message, MidiFile, MidiTrack
import os
from mido import MetaMessage
import random
import mido
# directory name should be midis, at the same level as this file. 

In [46]:
# for i, track in enumerate(mid.tracks):
#     print('Track {}: {}'.format(i, track.name))
#     for msg in track:
#         if msg.type == 'key_signature':
#             print(msg)
            

In [47]:
# This function will take in a folder and convert each MIDI file into a list of tokens where each token is a note value of the piece
# It will return a 2D list where each row is a piece and each column represents a token/note of the piece
def tokenize_pieces(midi_folder, track_name):
    midi_pieces = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == track_name:
                for msg in track:
                    if msg.type == 'note_on':
                         note_token_sequence.append(msg.note)
        midi_pieces.append(note_token_sequence)
    return midi_pieces


In [48]:
tokenize_pieces("./midis","Piano right")


[[64,
  67,
  72,
  67,
  76,
  77,
  74,
  72,
  76,
  77,
  74,
  72,
  88,
  78,
  79,
  80,
  83,
  81,
  79,
  77,
  76,
  74,
  72,
  71,
  79,
  77,
  74,
  72,
  81,
  79,
  76,
  74,
  82,
  81,
  69,
  79,
  77,
  76,
  74,
  72,
  71,
  72,
  74,
  79,
  83,
  86,
  86,
  84,
  83,
  81,
  81,
  79,
  78,
  76,
  74,
  72,
  71,
  72,
  71,
  74,
  79,
  83,
  86,
  86,
  84,
  83,
  81,
  81,
  79,
  78,
  76,
  74,
  72,
  71,
  72,
  71,
  74,
  72,
  71,
  72,
  74,
  76,
  78,
  79,
  81,
  83,
  84,
  86,
  88,
  90,
  91,
  90,
  88,
  86,
  84,
  83,
  81,
  79,
  78,
  76,
  88,
  79,
  83,
  78,
  81,
  79,
  64,
  67,
  72,
  67,
  76,
  77,
  74,
  72,
  76,
  77,
  74,
  72,
  88,
  78,
  79,
  80,
  83,
  81,
  79,
  77,
  76,
  74,
  72,
  71,
  79,
  77,
  74,
  72,
  81,
  79,
  76,
  74,
  82,
  81,
  69,
  79,
  77,
  76,
  74,
  72,
  71,
  72,
  74,
  79,
  83,
  86,
  86,
  84,
  83,
  81,
  81,
  79,
  78,
  76,
  74,
  72,
  71,
  72,
  71,
  74,
  79

In [49]:
# This function will generate n grams given a list of token sequences
def create_n_grams(n, token_sequences):
    
    paddings = n - 1
    start_padding = "<start>"
    end_padding = "<end>"
    n_grams_count = {}

    # Add the start padding (<start>) n-1 number of times
    if paddings > 0:
        for i in range(0, len(token_sequences)):
            for j in range(0, paddings):
                token_sequences[i].insert(0, start_padding)

    # Add the end padding (<end>) once to the end of each sequence
    for i in range(0, len(token_sequences)):
        token_sequences[i].append(end_padding)

    # Iterate through each word in each sequence and using slicing to get the n gram, then add to dictionary/update count
    for sequence in token_sequences:
        for i in range(len(sequence)-n+1): # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = sequence[i:i+n] # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = tuple(gram)
            if gram in n_grams_count:
                n_grams_count[gram] += 1
            else:
                n_grams_count[gram] = 1

    return n_grams_count


In [50]:
def generate_song(n_gram_frequencies, k):
    
    # We need to first determine what the "n" is from the input frequency dictionary
    n = len(tuple(n_gram_frequencies.keys())[0])

    # Start with an empty tweet
    song_sequence = []

    # For bigrams and up...
    if n - 1 > 0:
        for i in range(0, n-1):
            song_sequence.insert(0, "<start>")
    
        while song_sequence[len(song_sequence)-1] != "<end>":
            
            # Slice the song so we can get the previous n-1 gram that comes before the predicted token
            previous_token_sequence = song_sequence[len(song_sequence)-(n-1):len(song_sequence)]

            # Using dictionary comprehension, create a dictionary containing all n-grams that contain the previous token sequence
            matched_dictionary = {k:v for k,v in n_gram_frequencies.items() if k[0:n - 1] == tuple(previous_token_sequence)}

            # Prepare a list of possible choices along with their corresponding weights
            choices = []
            choices_weights = []

            # Populate the choices and weights by iterating through the keys in the matched dictionary 
            matched_dictionary_keylist = list(matched_dictionary.keys())
            for j in range(0, len(matched_dictionary_keylist)):
                # insert the last word of the tuple key insert it as a choice
                choices.append(matched_dictionary_keylist[j][len(matched_dictionary_keylist[j])-1])

                # Weights are calculated by taking the actual count of the key and dividing it by all that is found
                choices_weights.append(matched_dictionary[matched_dictionary_keylist[j]] / len(matched_dictionary) + k)
            
            chosen = random.choices(choices, weights=choices_weights, k=1)
            song_sequence.append(chosen[0])
    else:
        # Special case for unigrams only
        choices = []
        choices_weights = []
        n_gram_frequencies_keylist = list(n_gram_frequencies.keys())
        for j in range(0, len(n_gram_frequencies_keylist)):
            # insert the last word of the tuple key as a choice
            choices.append(n_gram_frequencies_keylist[j][0])
            choices_weights.append(n_gram_frequencies[n_gram_frequencies_keylist[j]] / len(n_gram_frequencies) + k)
        
        # Adding the first item so tweet isn't empty
        song_sequence.append(random.choices(choices, weights=choices_weights, k=1)[0])
        while song_sequence[len(song_sequence)-1] != "<end>":
            chosen = random.choices(choices, weights=choices_weights, k=1)
            song_sequence.append(chosen[0])
    
    # Removing the paddings and converting the grams of the tweet into a string
    # return ' '.join([word for word in tweet if word != "<start>" and word != "<end>"])
    for x in range(n-1):
        song_sequence.pop(0)
    song_sequence.pop(-1)
    return song_sequence

In [51]:
# Our bigram of notes!
song_token_sequences = create_n_grams(4, tokenize_pieces("./midis"))
song = generate_song(song_token_sequences, 0)

TypeError: tokenize_pieces() missing 1 required positional argument: 'track_name'

In [None]:
def create_midi(song):
    mid = MidiFile()
    track = MidiTrack()
    mid.tracks.append(track)
    for note in song: 
        track.append(mido.Message('note_on', note = note, time = 0))
        track.append(mido.Message('note_off', note = note, time = 256))

        
    mid.save('new_song2.mid')
    

In [None]:
create_midi(song)

In [53]:
def create_list_of_messages(midi_folder):
    list_of_messages = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == 'Piano right':
                for msg in track:
                    list_of_messages.append(msg)
    return list_of_messages

In [54]:
def create_times_list(list_of_messages):
    times_list = [0]
    time_sum = 0
    for msg in list_of_messages:
        time = msg.time
        time_sum += time
        times_list.append(time_sum)
    return times_list

In [56]:
list_of_messages = create_list_of_messages("./midis")
#len(list_of_messages)
#create_list_of_messages("./midis")
create_times_list(list_of_messages)

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 256,
 256,
 256,
 2048,
 2048,
 3072,
 3072,
 3584,
 3584,
 3840,
 3840,
 4096,
 4096,
 4608,
 6144,
 6656,
 6656,
 6912,
 6912,
 7168,
 7168,
 8192,
 8192,
 8704,
 9216,
 10240,
 10240,
 11776,
 11776,
 12288,
 12288,
 12800,
 12800,
 13312,
 13312,
 13824,
 13824,
 14336,
 14336,
 14848,
 14848,
 15360,
 15360,
 15872,
 15872,
 16384,
 16384,
 16896,
 16896,
 17408,
 18432,
 18944,
 18944,
 19456,
 19456,
 19968,
 19968,
 20480,
 21504,
 22016,
 22016,
 22528,
 22528,
 23040,
 23040,
 23552,
 24576,
 25600,
 25600,
 26112,
 26112,
 26624,
 26624,
 27136,
 27136,
 27648,
 27648,
 28160,
 28160,
 28672,
 28672,
 29696,
 30379,
 30379,
 30464,
 30635,
 30635,
 30720,
 30720,
 33792,
 33792,
 34048,
 34048,
 34304,
 34304,
 34560,
 34560,
 34816,
 34816,
 35072,
 35072,
 35328,
 35328,
 35584,
 35584,
 35840,
 35840,
 36096,
 36096,
 36352,
 36352,
 36608,
 36608,
 36864,
 36864,
 37120,
 37120,
 37376,
 37376,
 37632,
 37632,
 37888,
 37888,
 39936,
 399

In [41]:
# Convert time lists into right and left dictionaries for the notes that match
# Ex: right: {0 : A, 4 : B, 8 : D, etc…..}, left: {0 : C, 4 : E, 8 : F, etc……}

def create_dict(times, notes):
    duration_dict = {time:note for time, note in zip(times,notes)}
    return duration_dict
    


In [42]:
right_times = [0,4,8,12,20]
left_times = [0,4,8,16,24]

right_notes = [67,68,59,79,88]
left_times = [56,76,77,66,55]

create_dict(right_times, right_notes)

{0: 67, 4: 68, 8: 59, 12: 79, 20: 88}

In [None]:
def convert_rhythm_dictionary_to_note_sequences(right_hand_time_dict, left_hand_time_dict):
    
    right_hand = []
    left_hand = []
    
    for time in right_hand_time_dict.keys():
        right_hand.append(right_hand_time_dict[time])
        left_hand.append(left_hand_time_dict[time])
    
    return right_hand
        
        