In [1]:
import os
import mido
import numpy as np
import math
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import pickle

In [2]:
GMD_path = "magenta midi"

## MIDI map to note numbers

In [3]:
GM_dict = {
    # key is midi note number
    # values are:
    # [0] name (as string)
    # [1] name category low mid or high (as string)
    # [2] substiture midi number for simplified MIDI (all instruments)
    # [3] name of instrument for 8 note conversion (as string)
    # [4] number of instrument for 8 note conversion
    # [5] substiture midi number for conversion to 8 note
    # [6] substiture midi number for conversion to 16 note
    # [7] substiture midi number for conversion to 3 note
    # if we are going to remap just use GM_dict[msg.note][X]
    22: ["Closed Hi-Hat edge", "high", 42, "CH", 3, 42, 42, 42],
    26: ["Open Hi-Hat edge", "high", 46, "OH", 4, 46, 46, 42],
    35: ["Acoustic Bass Drum", "low", 36, "K", 1, 36, 36, 36],
    36: ["Bass Drum 1", "low", 36, "K", 1, 36, 36, 36],
    37: ["Side Stick", "mid", 37, "RS", 6, 37, 37, 38],
    38: ["Acoustic Snare", "mid", 38, "SN", 2, 38, 38, 38],
    39: ["Hand Clap", "mid", 39, "CP", 5, 39, 39, 38],
    40: ["Electric Snare", "mid", 38, "SN", 2, 38, 38, 38],
    41: ["Low Floor Tom", "low", 45, "LT", 7, 45, 45, 36],
    42: ["Closed Hi Hat", "high", 42, "CH", 3, 42, 42, 42],
    43: ["High Floor Tom", "mid", 45, "HT", 8, 45, 45, 38],
    44: ["Pedal Hi-Hat", "high", 46, "OH", 4, 46, 46, 42],
    45: ["Low Tom", "low", 45, "LT", 7, 45, 45, 36],
    46: ["Open Hi-Hat", "high", 46, "OH", 4, 46, 46, 42],
    47: ["Low-Mid Tom", "low", 47, "MT", 7, 45, 47, 36],
    48: ["Hi-Mid Tom", "mid", 47, "MT", 7, 50, 50, 38],
    49: ["Crash Cymbal 1", "high", 49, "CC", 4, 46, 42, 42],
    50: ["High Tom", "mid", 50, "HT", 8, 50, 50, 38],
    51: ["Ride Cymbal 1", "high", 51, "RC", -1, 42, 51, 42],
    52: ["Chinese Cymbal", "high", 52, "", -1, 46, 51, 42],
    53: ["Ride Bell", "high", 53, "", -1, 42, 51, 42],
    54: ["Tambourine", "high", 54, "", -1, 42, 69, 42],
    55: ["Splash Cymbal", "high", 55, "OH", 4, 46, 42, 42],
    56: ["Cowbell", "high", 56, "CB", -1, 37, 56, 42],
    57: ["Crash Cymbal 2", "high", 57, "CC", 4, 46, 42, 42],
    58: ["Vibraslap", "mid", 58, "VS", 6, 37, 37, 42],
    59: ["Ride Cymbal 2", "high", 59, "RC", 3, 42, 51, 42],
    60: ["Hi Bongo", "high", 60, "LB", 8, 45, 63, 42],
    61: ["Low Bongo", "mid", 61, "HB", 7, 45, 64, 38],
    62: ["Mute Hi Conga", "mid", 62, "MC", 8, 50, 62, 38],
    63: ["Open Hi Conga", "high", 63, "HC", 8, 50, 63, 42],
    64: ["Low Conga", "low", 64, "LC", 7, 45, 64, 36],
    65: ["High Timbale", "mid", 65, "", 8, 45, 63, 38],
    66: ["Low Timbale", "low", 66, "", 7, 45, 64, 36],
    67: ["High Agogo", "high", 67, "", -1, 37, 56, 42],
    68: ["Low Agogo", "mid", 68, "", -1, 37, 56, 38],
    69: ["Cabasa", "high", 69, "MA", -1, 42, 69, 42],
    70: ["Maracas", "high", 69, "MA", -1, 42, 69, 42],
    71: ["Short Whistle", "high", 71, "", -1, 37, 56, 42],
    72: ["Long Whistle", "high", 72, "", -1, 37, 56, 42],
    73: ["Short Guiro", "high", 73, "", -1, 42, 42, 42],
    74: ["Long Guiro", "high", 74, "", -1, 46, 46, 42],
    75: ["Claves", "high", 75, "", -1, 37, 75, 42],
    76: ["Hi Wood Block", "high", 76, "", 8, 50, 63, 42],
    77: ["Low Wood Block", "mid", 77, "", 7, 45, 64, 38],
    78: ["Mute Cuica", "high", 78, "", -1, 50, 62, 42],
    79: ["Open Cuica", "high", 79, "", -1, 45, 63, 42],
    80: ["Mute Triangle", "high", 80, "", -1, 37, 75, 42],
    81: ["Open Triangle", "high", 81, "", -1, 37, 75, 42],
}

## MIDI parser to ```hv_list ``` representation



In [4]:
def midifile2hv_list(file_name, mapping):
    '''
    pattern name must include .mid
    get a MIDI file and convert it to an hv_list (a list of note numbers and velocity)
    use the "mapping" variable to define the type of instrument mapping
    that will be used in the hv_list "all", "16", "8", "3"
    '''
    pattern=[]
    mid=mido.MidiFile(file_name) #create a mido file instance
    sixteenth= mid.ticks_per_beat/4 #find the length of a sixteenth note
    #print ("sixteenth", sixteenth)

    # time: inside a track, it is delta time in ticks (integrer).
    # A delta time is how long to wait before the next message.
    acc=0 #use this to keep track of time

    # depending on the instruments variable select a notemapping
    if mapping=="allinstruments":
        column=2
    elif mapping=="16instruments":
        column=6
    elif mapping=="8instruments":
        column=5
    elif mapping=="3instruments":
        column=7
    else: column = 2 # if no mapping is selected use "allinstrument" mapping

    for i, track in enumerate(mid.tracks):
        for msg in track: #process all messages
            acc += msg.time # accumulate time of any message type
            if msg.type == "note_on" and msg.velocity != 0: # skip velocity 0 format of note off
                if msg.note in list(GM_dict.keys()):
                  midinote = GM_dict[msg.note][column] #remap msg.note by demand
                  rounded_step = int((acc/sixteenth)+0.45)
                  midivelocity = msg.velocity/127 # normalize upfront
                  pattern.append((int(acc/sixteenth), midinote, midivelocity)) # step, note, velocity

        if len(pattern)>0: #just proceed if analyzed pattern has at least one onset

            #round the pattern to the next multiple of 16
            if (rounded_step/16) - (rounded_step//16) != 0:
                pattern_len_in_steps = (rounded_step//16)*16 + 16
            else:
                pattern_len_in_steps = (rounded_step//16)*16

            #create an empty list of lists the size of the pattern
            output_pattern=[[]]*pattern_len_in_steps
            # group the instruments and their velocity that played at a specific step
            i = 0
            for step in range(len(output_pattern)):
                output_pattern.append([(x[1],x[2]) for x in pattern if x[0]==step])
                #make sure no notes are repeated and events are sorted
                output_pattern[step] = list(set(output_pattern[step]))
                output_pattern[step].sort()

    ##################################
    # split the pattern every 16 steps
    ##################################
    hv_lists_split=[]
    for x in range(len(output_pattern)//16):
        patt_fragment = output_pattern[x*16:(x*16)+16]
        patt_density = sum([1 for x in patt_fragment if x!=[]])

        #############################################################
        # filter out patterns that have less than 4 events with notes
        #############################################################
        # NOTE: more conditions could be added (i.e. kick on step 0, etc)
        #############################################################
        if patt_density > 4:
            hv_lists_split.append(patt_fragment)

  # output is a 16-step pattern
    return hv_lists_split

def find_unique_hv_lists(hv_lists_split):
  # input a list of hv_lists and return the set of unique
  unique_hv = list(set([tuple([tuple(step) for step in hv_list]) for hv_list in hv_lists]))
  return unique_hv

## Flatten an `hv_list` to `FWOD` representation

In [5]:
# hv list flattening
def flatten_hv_list(hv_list):
  # input an hv list and output a flattened representation as a v_list

  # list of instruments and categories
  lows =  [35, 36, 41, 45, 47, 64, 66]
  mids =  [37, 38, 39, 40, 43, 48, 50, 61, 62, 65, 68, 77]
  his = [22, 26, 42, 44, 46, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 63, 67, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81]

  flat = np.zeros([len(hv_list),1])

  # multiply velocities and categories
  for i,step in enumerate(hv_list):
    step_weight = 0
    for onset in step:
      if onset[0] in lows:
        step_weight += onset[1]*3
      elif onset[0] in mids:
        step_weight += onset[1]*2
      else:
        step_weight += onset[1]*1
    flat[i] = step_weight

  flat = flat/max(flat)
  return flat

# Data creation

In [14]:
def list_folders(directory):
    return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]

def filter_midi(list): 
    return [x for x in list if x.endswith(".mid")]

def list_all_elements(directory):
    elements = []
    for root, dirs, files in os.walk(directory):
        for name in dirs:
            elements.append(os.path.join(root, name))
        for name in files:
            elements.append(os.path.join(root, name))
    return filter_midi(elements)

# Example usage
directory_path_magenta = 'magenta midi'
folders = list_folders(directory_path_magenta)

all_midi_files = {}


print(folders)
for genre in folders:
    all_midi_files[genre] = list_all_elements(f"{directory_path_magenta}/{genre}")
all_midi_files

['pop', 'neworleans', 'boska', 'blues', 'afrocuban', 'latin', 'sano', 'reggae', 'afrobeat', 'rock', 'dance', 'hiphop', 'punk', 'gospel', 'highlife', 'soul', 'middleeastern', 'country', 'funk', 'jazz']


{'pop': ['magenta midi/pop/105_pop_132_beat_4-4.mid',
  'magenta midi/pop/100_pop_142_beat_4-4.mid',
  'magenta midi/pop/104_pop_132_beat_4-4.mid',
  'magenta midi/pop/7_pop-groove7_138_beat_4-4.mid',
  'magenta midi/pop/101_pop_142_beat_4-4.mid',
  'magenta midi/pop/97_pop_142_beat_4-4.mid',
  'magenta midi/pop/102_pop_132_beat_4-4.mid',
  'magenta midi/pop/103_pop_132_beat_4-4.mid',
  'magenta midi/pop/96_pop_142_beat_4-4.mid',
  'magenta midi/pop/soft/22_pop-soft_83_beat_4-4.mid',
  'magenta midi/pop/soft/11_pop-soft_83_beat_4-4.mid',
  'magenta midi/pop/soft/23_pop-soft_83_beat_4-4.mid'],
 'neworleans': ['magenta midi/neworleans/shuffle/9_neworleans-shuffle_104_beat_4-4.mid',
  'magenta midi/neworleans/chacha/8_neworleans-chacha_124_beat_4-4.mid',
  'magenta midi/neworleans/secondline/6_neworleans-secondline_99_beat_4-4.mid',
  'magenta midi/neworleans/secondline/100_neworleans-secondline_94_beat_4-4.mid',
  'magenta midi/neworleans/secondline/7_neworleans-secondline_124_beat_4-4.m

In [None]:

for key, value in all_midi_files.items():
	print(key)
	for midi in value:
		print(len(midi))
		
	print('='*50)

pop
41
41
41
47
41
40
41
41
40
49
49
49
neworleans
69
67
74
76
75
63
63
64
63
63
63
63
63
boska
25
25
26
25
25
25
25
25
25
25
blues
59
59
59
59
afrocuban
53
52
53
65
67
68
68
latin
43
44
42
42
44
43
44
44
81
61
76
76
76
76
76
87
87
87
75
75
76
76
76
76
76
76
81
80
80
56
56
79
74
74
75
74
76
63
63
55
76
63
63
63
62
63
58
58
sano
23
23
24
23
23
23
23
23
23
23
reggae
46
45
45
56
afrobeat
49
49
49
50
49
49
49
49
50
50
49
49
49
rock
42
42
42
41
41
42
42
42
41
42
42
41
41
42
41
41
41
42
42
41
42
40
41
42
41
42
41
41
41
41
42
42
41
42
41
42
41
40
42
41
40
42
41
42
41
41
40
42
42
41
42
42
42
42
41
41
42
42
42
42
42
41
41
42
42
42
41
41
40
42
42
41
42
41
41
41
42
40
41
42
40
41
42
42
42
41
40
41
42
41
42
41
42
42
42
41
42
42
41
41
42
42
41
42
41
40
40
42
42
42
42
42
41
40
42
41
42
42
42
43
41
42
41
41
42
41
41
40
42
41
41
42
41
41
42
42
42
41
42
42
41
42
41
40
41
41
41
41
42
42
41
42
41
41
41
42
42
42
42
42
41
41
42
41
42
42
41
42
56
58
58
58
56
54
54
53
53
61
51
53
51
63
dance
55
56
56
57
55
6

In [None]:
fwod_representations = []
for key, value in all_midi_files.items():
	for midi in value:
		hv_lists = midifile2hv_list(midi, "allinstruments")
		# print(len(hv_lists)) ---> son arreglos que contiene 16 pasos
		for pattern in range(len(hv_lists)):
			fwod = flatten_hv_list(hv_lists[pattern])
			element = {'file': midi, 'sequence': pattern, 'class': key}
			for i in range(len(fwod)):
				element[f'step_{i}'] = fwod[i][0]
			fwod_representations.append(element)
fwod_representations

[{'file': 'magenta midi/pop/105_pop_132_beat_4-4.mid',
  'sequence': 0,
  'class': 'pop',
  'step_0': np.float64(0.0),
  'step_1': np.float64(0.0),
  'step_2': np.float64(0.0),
  'step_3': np.float64(0.05357142857142857),
  'step_4': np.float64(0.0),
  'step_5': np.float64(0.0),
  'step_6': np.float64(0.0),
  'step_7': np.float64(1.0),
  'step_8': np.float64(0.17857142857142858),
  'step_9': np.float64(0.07142857142857142),
  'step_10': np.float64(0.0),
  'step_11': np.float64(0.42857142857142855),
  'step_12': np.float64(0.0),
  'step_13': np.float64(0.05803571428571429),
  'step_14': np.float64(0.0),
  'step_15': np.float64(0.0)},
 {'file': 'magenta midi/pop/105_pop_132_beat_4-4.mid',
  'sequence': 1,
  'class': 'pop',
  'step_0': np.float64(0.8976377952755905),
  'step_1': np.float64(0.1889763779527559),
  'step_2': np.float64(0.0),
  'step_3': np.float64(0.0),
  'step_4': np.float64(1.0),
  'step_5': np.float64(0.13385826771653545),
  'step_6': np.float64(0.0),
  'step_7': np.float

In [34]:
fwod_representations_df = pd.DataFrame(fwod_representations)

# Display the DataFrame
fwod_representations_df

Unnamed: 0,file,sequence,class,step_0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,step_11,step_12,step_13,step_14,step_15
0,magenta midi/pop/105_pop_132_beat_4-4.mid,0,pop,0.000000,0.000000,0.000000,0.053571,0.000000,0.000000,0.000000,1.000000,0.178571,0.071429,0.000000,0.428571,0.000000,0.058036,0.000000,0.000000
1,magenta midi/pop/105_pop_132_beat_4-4.mid,1,pop,0.897638,0.188976,0.000000,0.000000,1.000000,0.133858,0.000000,1.000000,0.000000,0.212598,0.000000,1.000000,0.000000,0.259843,0.000000,1.000000
2,magenta midi/pop/105_pop_132_beat_4-4.mid,2,pop,0.000000,0.377953,0.000000,1.000000,0.000000,0.000000,0.000000,0.157480,0.892388,0.049869,0.000000,0.889764,0.000000,0.068241,0.000000,0.774278
3,magenta midi/pop/105_pop_132_beat_4-4.mid,3,pop,0.000000,0.480315,0.000000,1.000000,0.000000,0.393701,0.000000,0.333333,0.000000,0.482940,0.000000,1.000000,0.000000,0.041995,0.000000,0.703412
4,magenta midi/pop/105_pop_132_beat_4-4.mid,4,pop,0.094488,0.433071,0.000000,1.000000,0.000000,0.000000,0.377953,0.299213,0.000000,0.419948,0.000000,0.666667,0.314961,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19789,magenta midi/jazz/funk/4_jazz-funk_116_beat_4-...,32,jazz,0.584830,0.668663,0.287425,0.059880,0.702595,1.000000,0.147705,0.622754,0.119760,0.604790,0.000000,0.481038,0.327345,0.510978,0.215569,0.433134
19790,magenta midi/jazz/funk/4_jazz-funk_116_beat_4-...,33,jazz,0.083650,0.692015,0.692015,0.904943,0.266160,0.714829,0.737643,0.486692,0.463878,0.798479,0.536122,0.882129,0.585551,0.832700,1.000000,0.524715
19791,magenta midi/jazz/funk/4_jazz-funk_116_beat_4-...,34,jazz,0.258427,0.561798,0.387640,0.730337,0.000000,1.000000,0.140449,0.500000,0.148876,0.629213,0.311798,0.626404,0.154494,0.603933,0.337079,0.587079
19792,magenta midi/jazz/funk/4_jazz-funk_116_beat_4-...,35,jazz,0.323129,0.778912,0.448980,0.894558,0.187075,0.772109,0.217687,0.948980,0.000000,1.000000,0.418367,0.833333,0.183673,0.721088,0.163265,0.323129


In [35]:
# fwod_representations_df into excel
fwod_representations_df.to_excel('fwod_representations.xlsx', index=False)