In [4]:
from mido import MidiFile, MidiTrack
import os
from mido import MetaMessage

# directory name should be midis, at the same level as this file. 

In [1]:
for i, track in enumerate(mid.tracks):
    print('Track {}: {}'.format(i, track.name))
    for msg in track:
        if msg.type == 'key_signature':
            print(msg)
            

In [5]:
# This function will take in a folder and convert each MIDI file into a list of tokens where each token is a note value of the piece
# It will return a 2D list where each row is a piece and each column represents a token/note of the piece
def tokenize_pieces(midi_folder):
    midi_pieces = []
    for midi_file_name in os.listdir(midi_folder):
        midsource = MidiFile(midi_folder + "/" + midi_file_name)
        note_token_sequence = []
        for i, track in enumerate(midsource.tracks):
            if track.name == 'Piano right':
                for msg in track:
                    if msg.type == 'note_on':
                         note_token_sequence.append(msg.note)
        midi_pieces.append(note_token_sequence)
    return midi_pieces

In [6]:
# This function will generate n grams given a list of token sequences
def create_n_grams(n, token_sequences):
    
    paddings = n - 1
    start_padding = "<start>"
    end_padding = "<end>"
    n_grams_count = {}

    # Add the start padding (<start>) n-1 number of times
    if paddings > 0:
        for i in range(0, len(token_sequences)):
            for j in range(0, paddings):
                token_sequences[i].insert(0, start_padding)

    # Add the end padding (<end>) once to the end of each sequence
    for i in range(0, len(token_sequences)):
        token_sequences[i].append(end_padding)

    # Iterate through each word in each sequence and using slicing to get the n gram, then add to dictionary/update count
    for sequence in token_sequences:
        for i in range(len(sequence)-n+1): # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = sequence[i:i+n] # Source: https://stackoverflow.com/questions/13423919/computing-n-grams-using-python
            gram = tuple(gram)
            if gram in n_grams_count:
                n_grams_count[gram] += 1
            else:
                n_grams_count[gram] = 1

    return n_grams_count


In [7]:
# Our bigram of notes!
create_n_grams(2, tokenize_pieces("./midis"))

{('<start>', 64): 18,
 (64, 67): 701,
 (67, 72): 755,
 (72, 67): 550,
 (67, 76): 414,
 (76, 77): 1020,
 (77, 74): 585,
 (74, 72): 1831,
 (72, 76): 1087,
 (72, 88): 41,
 (88, 78): 34,
 (78, 79): 590,
 (79, 80): 247,
 (80, 83): 199,
 (83, 81): 724,
 (81, 79): 1157,
 (79, 77): 1293,
 (77, 76): 1460,
 (76, 74): 1747,
 (72, 71): 1274,
 (71, 79): 276,
 (72, 81): 335,
 (79, 76): 691,
 (74, 82): 61,
 (82, 81): 205,
 (81, 69): 324,
 (69, 79): 47,
 (71, 72): 1029,
 (72, 74): 1087,
 (74, 79): 428,
 (79, 83): 278,
 (83, 86): 323,
 (86, 86): 136,
 (86, 84): 530,
 (84, 83): 684,
 (81, 81): 292,
 (79, 78): 562,
 (78, 76): 247,
 (71, 74): 837,
 (74, 76): 1133,
 (76, 78): 212,
 (79, 81): 739,
 (81, 83): 583,
 (83, 84): 556,
 (84, 86): 373,
 (86, 88): 300,
 (88, 90): 75,
 (90, 91): 78,
 (91, 90): 64,
 (90, 88): 82,
 (88, 86): 373,
 (76, 88): 456,
 (88, 79): 147,
 (83, 78): 92,
 (78, 81): 204,
 (79, 64): 43,
 (79, 67): 367,
 (67, 79): 421,
 (79, 69): 82,
 (69, 81): 337,
 (81, 70): 47,
 (70, 82): 126,
 (8

In [60]:
# make dictionary 
# def createNoteDict(msgs):
#     note_dict = {}
#     for msg in msgs:
#         if msg.type == 'note_on':
#             #print(msg.note)
#             note = msg.note
#             if note in note_dict.keys():
#                 note_dict[note] += 1
#             else:
#                 note_dict[note] = 1    
#     sorted_note_dict = sorted(note_dict.items(), key=lambda x: x[1], reverse=True)
#     return sorted_note_dict