# Proyecto de Grado - Magenta to FWOD
In this colab we will explore the classification of the magenta Groove MIdi Dataset (GMD) using the FWOD representation. The idea is that we will:
* Extract the MIDI files that are in each folder and subfolder
* Parse each file and convert it to hit and velocity list representation (hv_list).
* Split the MIDI file into 16 steps (one bar) and flatten it to FWOD representation
* create a data frame where we have these rows MIDI file name / bar / flat representation / FWOD representation / class
* Using the last two columns of the dataframe (FWOD and class) we can train a model to see if we can guess the class from the FWOD representation.

**Notes**
Behzad Haki (MTG PhD student) suggests that we also extact the microdeviation descriptor from each bar of the MIDI file. He suggests this information can be useful also to distinguish patterns that have similar patterns as hiphop and rock. We will do this on oure second iteration.

Our benchmark is to classify the files better and simpler than these guys: https://arxiv.org/pdf/2407.17536. It will be easy since they just use four classes (funk, jazz, latin and rock)


# Start everything

## Install mido library

## Import all necessary modules and mount drive

In [1]:
import os
import mido
import numpy as np
import math
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import pickle

## Define location of Groove Midi Dataset files
Please note that this location should be edited in each personal Colab, as addresses in drive/colab can't be relative.

In [2]:
GMD_path = "magenta midi"

## MIDI map to note numbers

In [3]:
GM_dict = {
    # key is midi note number
    # values are:
    # [0] name (as string)
    # [1] name category low mid or high (as string)
    # [2] substiture midi number for simplified MIDI (all instruments)
    # [3] name of instrument for 8 note conversion (as string)
    # [4] number of instrument for 8 note conversion
    # [5] substiture midi number for conversion to 8 note
    # [6] substiture midi number for conversion to 16 note
    # [7] substiture midi number for conversion to 3 note
    # if we are going to remap just use GM_dict[msg.note][X]
    22: ["Closed Hi-Hat edge", "high", 42, "CH", 3, 42, 42, 42],
    26: ["Open Hi-Hat edge", "high", 46, "OH", 4, 46, 46, 42],
    35: ["Acoustic Bass Drum", "low", 36, "K", 1, 36, 36, 36],
    36: ["Bass Drum 1", "low", 36, "K", 1, 36, 36, 36],
    37: ["Side Stick", "mid", 37, "RS", 6, 37, 37, 38],
    38: ["Acoustic Snare", "mid", 38, "SN", 2, 38, 38, 38],
    39: ["Hand Clap", "mid", 39, "CP", 5, 39, 39, 38],
    40: ["Electric Snare", "mid", 38, "SN", 2, 38, 38, 38],
    41: ["Low Floor Tom", "low", 45, "LT", 7, 45, 45, 36],
    42: ["Closed Hi Hat", "high", 42, "CH", 3, 42, 42, 42],
    43: ["High Floor Tom", "mid", 45, "HT", 8, 45, 45, 38],
    44: ["Pedal Hi-Hat", "high", 46, "OH", 4, 46, 46, 42],
    45: ["Low Tom", "low", 45, "LT", 7, 45, 45, 36],
    46: ["Open Hi-Hat", "high", 46, "OH", 4, 46, 46, 42],
    47: ["Low-Mid Tom", "low", 47, "MT", 7, 45, 47, 36],
    48: ["Hi-Mid Tom", "mid", 47, "MT", 7, 50, 50, 38],
    49: ["Crash Cymbal 1", "high", 49, "CC", 4, 46, 42, 42],
    50: ["High Tom", "mid", 50, "HT", 8, 50, 50, 38],
    51: ["Ride Cymbal 1", "high", 51, "RC", -1, 42, 51, 42],
    52: ["Chinese Cymbal", "high", 52, "", -1, 46, 51, 42],
    53: ["Ride Bell", "high", 53, "", -1, 42, 51, 42],
    54: ["Tambourine", "high", 54, "", -1, 42, 69, 42],
    55: ["Splash Cymbal", "high", 55, "OH", 4, 46, 42, 42],
    56: ["Cowbell", "high", 56, "CB", -1, 37, 56, 42],
    57: ["Crash Cymbal 2", "high", 57, "CC", 4, 46, 42, 42],
    58: ["Vibraslap", "mid", 58, "VS", 6, 37, 37, 42],
    59: ["Ride Cymbal 2", "high", 59, "RC", 3, 42, 51, 42],
    60: ["Hi Bongo", "high", 60, "LB", 8, 45, 63, 42],
    61: ["Low Bongo", "mid", 61, "HB", 7, 45, 64, 38],
    62: ["Mute Hi Conga", "mid", 62, "MC", 8, 50, 62, 38],
    63: ["Open Hi Conga", "high", 63, "HC", 8, 50, 63, 42],
    64: ["Low Conga", "low", 64, "LC", 7, 45, 64, 36],
    65: ["High Timbale", "mid", 65, "", 8, 45, 63, 38],
    66: ["Low Timbale", "low", 66, "", 7, 45, 64, 36],
    67: ["High Agogo", "high", 67, "", -1, 37, 56, 42],
    68: ["Low Agogo", "mid", 68, "", -1, 37, 56, 38],
    69: ["Cabasa", "high", 69, "MA", -1, 42, 69, 42],
    70: ["Maracas", "high", 69, "MA", -1, 42, 69, 42],
    71: ["Short Whistle", "high", 71, "", -1, 37, 56, 42],
    72: ["Long Whistle", "high", 72, "", -1, 37, 56, 42],
    73: ["Short Guiro", "high", 73, "", -1, 42, 42, 42],
    74: ["Long Guiro", "high", 74, "", -1, 46, 46, 42],
    75: ["Claves", "high", 75, "", -1, 37, 75, 42],
    76: ["Hi Wood Block", "high", 76, "", 8, 50, 63, 42],
    77: ["Low Wood Block", "mid", 77, "", 7, 45, 64, 38],
    78: ["Mute Cuica", "high", 78, "", -1, 50, 62, 42],
    79: ["Open Cuica", "high", 79, "", -1, 45, 63, 42],
    80: ["Mute Triangle", "high", 80, "", -1, 37, 75, 42],
    81: ["Open Triangle", "high", 81, "", -1, 37, 75, 42],
}

## MIDI parser to ```hv_list ``` representation



In [4]:
def midifile2hv_list(file_name, mapping):
    '''
    pattern name must include .mid
    get a MIDI file and convert it to an hv_list (a list of note numbers and velocity)
    use the "mapping" variable to define the type of instrument mapping
    that will be used in the hv_list "all", "16", "8", "3"
    '''
    pattern=[]
    mid=mido.MidiFile(file_name) #create a mido file instance
    sixteenth= mid.ticks_per_beat/4 #find the length of a sixteenth note
    #print ("sixteenth", sixteenth)

    # time: inside a track, it is delta time in ticks (integrer).
    # A delta time is how long to wait before the next message.
    acc=0 #use this to keep track of time

    # depending on the instruments variable select a notemapping
    if mapping=="allinstruments":
        column=2
    elif mapping=="16instruments":
        column=6
    elif mapping=="8instruments":
        column=5
    elif mapping=="3instruments":
        column=7
    else: column = 2 # if no mapping is selected use "allinstrument" mapping

    for i, track in enumerate(mid.tracks):
        for msg in track: #process all messages
            acc += msg.time # accumulate time of any message type
            if msg.type == "note_on" and msg.velocity != 0: # skip velocity 0 format of note off
                if msg.note in list(GM_dict.keys()):
                  midinote = GM_dict[msg.note][column] #remap msg.note by demand
                  rounded_step = int((acc/sixteenth)+0.45)
                  midivelocity = msg.velocity/127 # normalize upfront
                  pattern.append((int(acc/sixteenth), midinote, midivelocity)) # step, note, velocity

        if len(pattern)>0: #just proceed if analyzed pattern has at least one onset

            #round the pattern to the next multiple of 16
            if (rounded_step/16) - (rounded_step//16) != 0:
                pattern_len_in_steps = (rounded_step//16)*16 + 16
            else:
                pattern_len_in_steps = (rounded_step//16)*16

            #create an empty list of lists the size of the pattern
            output_pattern=[[]]*pattern_len_in_steps
            # group the instruments and their velocity that played at a specific step
            i = 0
            for step in range(len(output_pattern)):
                output_pattern.append([(x[1],x[2]) for x in pattern if x[0]==step])
                #make sure no notes are repeated and events are sorted
                output_pattern[step] = list(set(output_pattern[step]))
                output_pattern[step].sort()

    ##################################
    # split the pattern every 16 steps
    ##################################
    hv_lists_split=[]
    for x in range(len(output_pattern)//16):
        patt_fragment = output_pattern[x*16:(x*16)+16]
        patt_density = sum([1 for x in patt_fragment if x!=[]])

        #############################################################
        # filter out patterns that have less than 4 events with notes
        #############################################################
        # NOTE: more conditions could be added (i.e. kick on step 0, etc)
        #############################################################
        if patt_density > 4:
            hv_lists_split.append(patt_fragment)

  # output is a 16-step pattern
    return hv_lists_split

def find_unique_hv_lists(hv_lists_split):
  # input a list of hv_lists and return the set of unique
  unique_hv = list(set([tuple([tuple(step) for step in hv_list]) for hv_list in hv_lists]))
  return unique_hv

## Flatten an `hv_list` to `FWOD` representation

In [5]:
# hv list flattening
def flatten_hv_list(hv_list):
  # input an hv list and output a flattened representation as a v_list

  # list of instruments and categories
  lows =  [35, 36, 41, 45, 47, 64, 66]
  mids =  [37, 38, 39, 40, 43, 48, 50, 61, 62, 65, 68, 77]
  his = [22, 26, 42, 44, 46, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 63, 67, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81]

  flat = np.zeros([len(hv_list),1])

  # multiply velocities and categories
  for i,step in enumerate(hv_list):
    step_weight = 0
    for onset in step:
      if onset[0] in lows:
        step_weight += onset[1]*3
      elif onset[0] in mids:
        step_weight += onset[1]*2
      else:
        step_weight += onset[1]*1
    flat[i] = step_weight

  flat = flat/max(flat)
  return flat

# Processing example
In this code section we will

1.   import a MIDI file
2.   parse to hv_lists
3.   convert to FWOD representation



In [9]:
GMD_path # the gm dataset is here
items_in_directory = os.listdir(GMD_path)
folders_in_directory = [item for item in items_in_directory if os.path.isdir(os.path.join(GMD_path, item))] # list the folders whoch are classes
# print("All folders in the GMD_path folder. These are the classes we will want to classify in:")
# print(folders_in_directory)

folder = 'pop'

# Example, get all MIDI files in the 'afrobeat' folder
MIDI_files_in_afrobeat = os.listdir(GMD_path+f"/{folder}")
MIDI_files_in_afrobeat = [item for item in MIDI_files_in_afrobeat if os.path.isfile(os.path.join(GMD_path+f"/{folder}", item))]
MIDI_files_in_afrobeat
# print("these are the midi files in the 'afrobeat' folder:")
# print(MIDI_files_in_afrobeat)

# select one file (i.e. the first one) and parse it to hv_lists
midi_file = MIDI_files_in_afrobeat[2] #select teh first file
hv_lists = midifile2hv_list(GMD_path+f"/{folder}/"+midi_file, "all_instruments")
hv_lists[3]

# fwod_representations = [flatten_hv_list(hv_list) for hv_list in hv_lists]
# fwod_representations

[[],
 [],
 [],
 [(46, 0.6456692913385826)],
 [],
 [(53, 0.4645669291338583)],
 [(53, 0.48031496062992124)],
 [(38, 1.0)],
 [],
 [(53, 1.0)],
 [(42, 0.13385826771653545)],
 [(45, 0.3543307086614173)],
 [(42, 0.5826771653543307)],
 [(38, 1.0)],
 [(42, 1.0)],
 [(36, 0.9212598425196851), (55, 0.7165354330708661)]]

In [7]:
# Plot multiple FWOD representations (first 5 patterns as example)
plt.figure(figsize=(12, 8))
for i, fwod in enumerate(fwod_representations[:5]):
    plt.plot(range(1, 17), fwod.flatten(), label=f'Pattern {i+1}')
plt.title('FWOD Representations Across Patterns', fontsize=16)
plt.xlabel('Step (1-16)', fontsize=14)
plt.ylabel('FWOD Intensity (Normalized)', fontsize=14)
plt.xticks(range(1, 17))
plt.legend()
plt.grid(axis='both', linestyle='--', alpha=0.6)
plt.show()


NameError: name 'fwod_representations' is not defined

<Figure size 1200x800 with 0 Axes>