In [10]:
### \\\ --- This script wrangles MIDI data into a format usable by R Studio for the purpose of plotting dynamic networks. --- /// ###
# Python version: Targets 3.6.6
# Libraries: csv, itertools, mido
# Written by: Kyle Quarles

import csv
import itertools
from mido import MidiFile #! Targets no later than Python 3.6!
mid = MidiFile('ockIntroitus.mid')

###Getting oriented with Mido:

# how to print just midi data;;; 
#for i, track in enumerate(mid.tracks):
    #print('Track {}: {}'.format(i, track.name))#be advised that in MIDI format a note_on messsage with a velocity of 0 is equivalent to a note_off message
    #for msg in track:
        #print(str(msg))

#how to see how many tracks in a MidiFile
#mid.print_tracks(meta_only=True)

### \\\ --- Dynamic NODE creation from a midi file --- /// ###

mid = MidiFile('ockIntroitus.mid')#replace this with whatever .mid file you want to analyze (make sure it's in same directory as this script)

def midi2list(midi_file): # gets only the note_on and note_off messages, creates a sublist for each, and breaks their parameters into separate strings
    for i, track in enumerate(midi_file.tracks):
        extraction = [ list(str(msg).split(" ")) for msg in track if msg.type == 'note_on' or msg.type == 'note_off' ]
        yield [ 'Track {} {}'.format(i, track.name), list(extraction) ]

def get_full(some_list): # filters out empty message lists (usually the meta message list is stored in track 0, so it's empty from the midi2list function filtering only note_on/note_off messages)
    return [ i for i in some_list if len(i[1]) != 0]

def numberer(message_list): # adds number to beginning of each sub-list, which will be used in .csv creation to indicate row number (turns out this is not needed, I misunderstood how csv writer worked...)
    for entry in message_list:
        counter = 1
        for message in entry[1]:
            message.insert(0, counter)
            counter += 1
    return message_list

def standardizer(numbed_list): # tests for note_on messages with velocity = 0; changes them to note_offs
    for entry in numbed_list:
        for message in entry[1]:
            if message[4] == 'velocity=0':
                 message[1] = 'note_off'
    return numbed_list

def abs_onsets(pl): # MIDI protocol lists times as 'time from last event' in other words delta times; this gets just the onset delta-times and converts them to an int list of absolute times
    for entry in pl:
        deltas = [ int( message[5].partition('=')[2]) for message in entry[1] ]
        yield itertools.accumulate(deltas)

def lister(blah): # this is just dealing with the yield iterator
    return [ list(item) for item in blah  ]

def final_formatter(combined_func, onsets): # finally putting everything together
    for i, entry in enumerate(combined_func):
        for j in range(0,len(entry[1]), 2): #we want to step through 'combined_func' by twos, to get the start and end time of a single node from a note_on message and the note_off immediately following
            onset = onsets[i][j]
            terminus = onsets[i][j+1]
            vertex_id = combined_func[i][1][j][3].partition('=')[2]
            onset_censored = 'FALSE'
            terminus_censored = 'FALSE'
            duration = terminus - onset
            yield [ onset, terminus, vertex_id, onset_censored, terminus_censored, duration ]

def csv_dynamic_node_maker(partitioned_list): # DEPRECATED, see new csv creators at bottom;making a .csv file out of each track's messages in 'partitioned_list' and saving it
    for i, entry in enumerate(partitioned_list):
        entry = [['onset', 'terminus', 'vertex.id', 'onset.censored', 'terminus.censored', 'duration']] + entry # column header labels
        title = i + 1 # titling each .csv file by its track's index ( we filtered out MetaMessage track 0, so need to add 1 to index to accurately reflect original track number)
        with open('track_{}_dynamic_nodes.csv'.format(title), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(entry)

raw_midi_list = list(midi2list(mid))
# print('raw_midi_list',raw_midi_list)
full_messages = get_full(raw_midi_list)
# print('full_messages',full_messages)
numbed_full_mess = numberer(full_messages)
# print('numbed_full_mess',numbed_full_mess)
standardized = standardizer(numbed_full_mess)
# print('standardized',standardized)
combined_func = standardizer(numberer(get_full(list(midi2list(mid)))))
# print('combined_func',combined_func)
onsetz = list(abs_onsets(combined_func))
list_onsetz = lister(onsetz)
# print('list_onsetz',list_onsetz)
node_partition_sizes = [len(entry[1]) // 2 for entry in combined_func] # but the yield doesn't distinguish between tracks, so we have to make this list for how many messages each track contains
# print('node_partition_sizes',node_partition_sizes)
final_format = final_formatter(combined_func, list_onsetz)
# print('final_format',list(final_format))
partitioned_list = [list(itertools.islice(final_format, elem)) for elem in node_partition_sizes] #islice takes a generator for its first argument! i.e. the yield from 'final_formatter'. And we use 'node_partition_sizes' to tell islice how to group messages from the yield generator.
#print('partitioned_list',partitioned_list)

#csv_dynamic_node_maker(partitioned_list) # DEPRECATED; these are the dynamic node lists used in R; saved to same directory this script is in
# Uses the midi keys names for note id numbering;
# that can cause problems in various R packages who think that if they see a vertex.id of n, then they need to automatically create >=n nodes.
#kept for posterity because we may need this form for some purposes, but for the R script as currently configured it's non-functional.


### \\\ --- Dynamic EDGE creation from Dynamic nodes --- /// ###

def edge_converter(partitioned_list):
    for track in partitioned_list:
        for j in range(len(track) - 1): #we need to stop this iterative process before we reach the last message so that j doesn't go out of range
            onset = track[j][1]
            terminus = track[j+1][1]
            tail = track[j][2]
            head = track[j+1][2]
            onset_censored = 'FALSE'
            terminus_censored = 'FALSE'
            duration = terminus - onset
            edge_id = tail + head
            yield [onset, terminus, tail, head, onset_censored, terminus_censored, duration, edge_id ]

def edge_id_maker(partitioned_edge_conversion): #generates a list of unordered sets containing the unique node_pair strings for each track
    for track in partitioned_edge_conversion:
        node_pair_set = set()
        for msg in track:
            node_pair_set.add(msg[7])
        yield node_pair_set

def edge_id_dict_maker(edge_list): # list of dictionaries containing node_pair strings as keys and ints as values
    return [{key:count for count, key in enumerate(edge, 1)} for edge in edge_list]

def edge_id_converter(partitioned_edge_conversion, edge_id_dict_list): # replacing each unique node_pair string with its dict value (per track)
    dummy = partitioned_edge_conversion.copy()
    for i, track in enumerate(dummy):
        for msg in track:
            msg.append(edge_id_dict_list[i][msg[7]])
            msg.pop(7)
    return dummy

def csv_dynamic_edge_maker(edge_id_conversion): #DEPRECATED; see new csv creators below
    for i, entry in enumerate(edge_id_conversion):
        entry = [['onset', 'terminus', 'tail', 'head', 'onset.censored', 'terminus.censored', 'duration', 'edge.id']] + entry # column header labels
        title = i + 1 # titling each .csv file by its index ( we filtered out MetaMessage track 0, so need to add 1 to index to accurately reflect original track number)
        with open('track_{}_dynamic_edges.csv'.format(title), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(entry)

def static_edge_maker_conversion(edge_id_conversion): # getting only the tail and head info from the dynamic edge list
    for track in edge_id_conversion:
        yield [[msg[2], msg[3], '1'] for msg in track ] #1 is for adding weight; useful for summing total arrows in R to give aggregate weights

def csv_static_edge_maker(static_edge_list):
    for i, entry in enumerate(static_edge_list):
        entry = [['tail', 'head', 'weight']] + entry #column header labels
        title = i + 1 # titling each .csv file by its index ( we filtered out MetaMessage track 0, so need to add 1 to index to accurately reflect original track number)
        with open('track_{}_static_edges.csv'.format(title), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(entry)
            #print(entry)

edge_conversion = edge_converter(partitioned_list)
# print('edge_conversion',list(edge_conversion))
edge_partition_sizes = [i-1 for i in node_partition_sizes]# num(edges) is always num(nodes) - 1, in each track
# print(edge_partition_sizes)
partitioned_edge_conversion = [list(itertools.islice(edge_conversion, elem)) for elem in edge_partition_sizes] # like before the yield generator needs to be told how to group results into tracks
# print(partitioned_edge_conversion)
edge_list = list(edge_id_maker(partitioned_edge_conversion)) # this is a list of lists of strings (for each track) of the form ''tail'+'head'', i.e. '7172' or '26127' etc.; we assign a unique int to each unique string in the next step
#print(edge_list)
edge_id_dict_list = edge_id_dict_maker(edge_list)
#print(edge_id_dict_list)
edge_id_conversion = edge_id_converter(partitioned_edge_conversion, edge_id_dict_list)
# print('edge_id_conversion',edge_id_conversion)


#csv_dynamic_edge_maker(edge_id_conversion) # DEPRECATED, see below these are the dynamic edge lists, output and saved
static_edge_list = list(static_edge_maker_conversion(edge_id_conversion))
#print('static_edge_list',static_edge_list)
# csv_static_edge_maker(static_edge_list) #DEPRECATED;# Uses the midi keys names for note id numbering;
# that can cause problems in various R packages who think that if they see a vertex.id of n, then they need to automatically create >=n nodes.
#kept for posterity because we may need this form for some purposes, but for the R script as currently configured it's non-functional. 

### \\\ --- Vertex attributes sub-routine --- /// ###
# two ways to do this

chromatic_pitches = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']

def MIDI_pitch_mapping(chromatic_pitches):
    i = 0
    for j in range(24,128):
        note = chromatic_pitches[(j-24) % 12]
        if note == 'C':
            i += 1
        yield [ j, note + str(i) ]

shame = [[21, 'A0'], [22, 'A#0'], [23, 'B0']] #lol ugly hack, hence 'shame'

def csv_total_vertex_attributes_maker(MIDI_map): # first way is to include every possible MIDI note name in the vertex attributes file
    alias = MIDI_map[:]
    alias.insert(0, ['vertex.id', 'note_name',]) # column header labels
    with open('MIDI_map.csv', 'w', newline='') as file: 
        writer = csv.writer(file)
        writer.writerows(alias)

### Second Way ###
def get_node_sets(partitioned_list): # getting an unordered set of nodes present in each track; we will need this if we want to send a vertexAttributes file to R with ONLY those notes used in a track as vertices, instead of full MIDI_map
    for track in partitioned_list:
        node_set = set()
        for msg in track:
            node_set.add(int(msg[2]))
        yield node_set

def node_filter(MIDI_map): #structures node_list with midi_key and note_name string, to be used in .csv creator
            return [ list(filter(lambda m: m[0] in node_list, MIDI_map)) for node_list in node_lists ] # filter generators have no intrinsic type so type needs to be specificied by running list() inside comprehension

def number_from_1(filt_list):
    for track in (filt_list):
        for countish, msg in enumerate(track):
             yield [ countish + 1, msg[1] ]

def csv_vertex_attr_by_track(grouping_proper_numbered):#DEPRECATED, see below for new csv creators
    for i, entry in enumerate(grouping_proper_numbered):
        entry = [['vertex.id', 'name']] + entry #column header labels
        title = i + 1 # titling each .csv file by its index ( we filtered out MetaMessage track 0, so need to add 1 to index to accurately reflect original track number)
        with open('track_{}_static_nodes.csv'.format(title), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(entry)

MIDI_map = shame + list(MIDI_pitch_mapping(chromatic_pitches))
#print('MIDI_map',MIDI_map)
csv_total_vertex_attributes_maker(MIDI_map) # output and saved; this should be sent to R SNA package as vertex attributes, aka vertex.attr
node_sets = list(get_node_sets(partitioned_list))
#print('node_sets',node_sets)
node_lists = [ sorted(list(node_set)) for node_set in node_sets ] #ordering each set
#print('node_lists',node_lists)
node_list_partition_sizes = [ len(track) for track in node_lists ] #same method from before; we're going to need this for our number_from_1 function to tell yield how to group tracks using islice
#print('node_list_partition_sizes',node_list_partition_sizes)
formatted_node_lists = node_filter(MIDI_map)
#print('formatted_node_lists',list(formatted_node_lists))
proper_numbered = number_from_1(formatted_node_lists)
grouping_proper_numbered = [list(itertools.islice(proper_numbered, elem)) for elem in node_list_partition_sizes]
#print('grouping_proper_numbered',grouping_proper_numbered)

#csv_vertex_attr_by_track(grouping_proper_numbered)#DEPRECATED;# Uses the midi keys names for note id numbering;
# that can cause problems in various R packages who think that if they see a vertex.id of n, then they need to automatically create >=n nodes.
#kept for posterity because we may need this form for some purposes, but for the R script as currently configured it's non-functional. 

### \\\ --- Various hacks to fix issue of R assuming I want all nodes drawn up to a certain vertex.id; --- /// ###
#  so if I just use MIDI numbers as vertex ids, then if the highest midi number used is, for example say, 72, then it will draw 72 nodes, even if vertex.ids 1-60 are never used.
# so below we are, ex-post-facto, numbering each midi_note with a unique smaller number, per track (because the notes used per track change)

low_number_lists=[]
for track in grouping_proper_numbered:
    new_track=[]
    for msg in track:
        new_track.append(msg[0])
    low_number_lists.append(new_track)
#print('low_number_lists',low_number_lists)

midi_key_number_lists=[]
for track in formatted_node_lists:
    new_track=[]
    for msg in track:
        new_track.append(msg[0])
    midi_key_number_lists.append(new_track)
#print('midi_key_number_lists',midi_key_number_lists)

node_id_dict=[]
for i in range(len(low_number_lists)):
    node_id_dict.append(dict(list(zip(midi_key_number_lists[i],low_number_lists[i]))))
#print('node_id_dict',node_id_dict)

corrected_static_edges=[]
for i, track in enumerate(static_edge_list):
    new_track=[]
    for msg in track:
        new_track.append([node_id_dict[i][int(msg[0])],node_id_dict[i][int(msg[1])],msg[2]])
    corrected_static_edges.append(new_track)
#print('corrected_static_edges',corrected_static_edges)
csv_static_edge_maker(corrected_static_edges)

corrected_dynamic_edges=[]
for i, track in enumerate(edge_id_conversion):
    new_track=[]
    for msg in track:
        new_track.append([msg[0],msg[1],node_id_dict[i][int(msg[2])],node_id_dict[i][int(msg[3])],msg[4],msg[5],msg[6],msg[7]])
    corrected_dynamic_edges.append(new_track)
#print('corrected_dynamic_edges',corrected_dynamic_edges)
csv_dynamic_edge_maker(corrected_dynamic_edges)

corrected_dynamic_nodes=[]
for i, track in enumerate(partitioned_list):
    new_track=[]
    for msg in track:
        new_track.append([msg[0],msg[1],node_id_dict[i][int(msg[2])],msg[3],msg[4],msg[5]])
    corrected_dynamic_nodes.append(new_track)
#print('corrected_dynamic_nodes',corrected_dynamic_nodes)
csv_dynamic_node_maker(corrected_dynamic_nodes)
