In [None]:
import os
import mido
import numpy as np
import math
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import pickle

In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

In [None]:
# function to list folders and subfolders
def list_files_and_subfolders(folder_path):
    file_names = []
    subfolder_names = []
    for root, dirs, files in os.walk(folder_path):
        # Print subfolders
        for directory in dirs:
          subfolder = os.path.join(root, directory)
          subfolder_names.append(subfolder)
        # Print files
        for file in files:

            name = os.path.join(root, file)
            #print (name)
            file_names.append(name)
    return subfolder_names, file_names

In [None]:
GM_dict = {
    # key is midi note number
    # values are:
    # [0] name (as string)
    # [1] name category low mid or high (as string)
    # [2] substiture midi number for simplified MIDI (all instruments)
    # [3] name of instrument for 8 note conversion (as string)
    # [4] number of instrument for 8 note conversion
    # [5] substiture midi number for conversion to 8 note
    # [6] substiture midi number for conversion to 16 note
    # [7] substiture midi number for conversion to 3 note
    # if we are going to remap just use GM_dict[msg.note][X]
    22: ["Closed Hi-Hat edge", "high", 42, "CH", 3, 42, 42, 42],
    26: ["Open Hi-Hat edge", "high", 46, "OH", 4, 46, 46, 42],
    35: ["Acoustic Bass Drum", "low", 36, "K", 1, 36, 36, 36],
    36: ["Bass Drum 1", "low", 36, "K", 1, 36, 36, 36],
    37: ["Side Stick", "mid", 37, "RS", 6, 37, 37, 38],
    38: ["Acoustic Snare", "mid", 38, "SN", 2, 38, 38, 38],
    39: ["Hand Clap", "mid", 39, "CP", 5, 39, 39, 38],
    40: ["Electric Snare", "mid", 38, "SN", 2, 38, 38, 38],
    41: ["Low Floor Tom", "low", 45, "LT", 7, 45, 45, 36],
    42: ["Closed Hi Hat", "high", 42, "CH", 3, 42, 42, 42],
    43: ["High Floor Tom", "mid", 45, "HT", 8, 45, 45, 38],
    44: ["Pedal Hi-Hat", "high", 46, "OH", 4, 46, 46, 42],
    45: ["Low Tom", "low", 45, "LT", 7, 45, 45, 36],
    46: ["Open Hi-Hat", "high", 46, "OH", 4, 46, 46, 42],
    47: ["Low-Mid Tom", "low", 47, "MT", 7, 45, 47, 36],
    48: ["Hi-Mid Tom", "mid", 47, "MT", 7, 50, 50, 38],
    49: ["Crash Cymbal 1", "high", 49, "CC", 4, 46, 42, 42],
    50: ["High Tom", "mid", 50, "HT", 8, 50, 50, 38],
    51: ["Ride Cymbal 1", "high", 51, "RC", -1, 42, 51, 42],
    52: ["Chinese Cymbal", "high", 52, "", -1, 46, 51, 42],
    53: ["Ride Bell", "high", 53, "", -1, 42, 51, 42],
    54: ["Tambourine", "high", 54, "", -1, 42, 69, 42],
    55: ["Splash Cymbal", "high", 55, "OH", 4, 46, 42, 42],
    56: ["Cowbell", "high", 56, "CB", -1, 37, 56, 42],
    57: ["Crash Cymbal 2", "high", 57, "CC", 4, 46, 42, 42],
    58: ["Vibraslap", "mid", 58, "VS", 6, 37, 37, 42],
    59: ["Ride Cymbal 2", "high", 59, "RC", 3, 42, 51, 42],
    60: ["Hi Bongo", "high", 60, "LB", 8, 45, 63, 42],
    61: ["Low Bongo", "mid", 61, "HB", 7, 45, 64, 38],
    62: ["Mute Hi Conga", "mid", 62, "MC", 8, 50, 62, 38],
    63: ["Open Hi Conga", "high", 63, "HC", 8, 50, 63, 42],
    64: ["Low Conga", "low", 64, "LC", 7, 45, 64, 36],
    65: ["High Timbale", "mid", 65, "", 8, 45, 63, 38],
    66: ["Low Timbale", "low", 66, "", 7, 45, 64, 36],
    67: ["High Agogo", "high", 67, "", -1, 37, 56, 42],
    68: ["Low Agogo", "mid", 68, "", -1, 37, 56, 38],
    69: ["Cabasa", "high", 69, "MA", -1, 42, 69, 42],
    70: ["Maracas", "high", 69, "MA", -1, 42, 69, 42],
    71: ["Short Whistle", "high", 71, "", -1, 37, 56, 42],
    72: ["Long Whistle", "high", 72, "", -1, 37, 56, 42],
    73: ["Short Guiro", "high", 73, "", -1, 42, 42, 42],
    74: ["Long Guiro", "high", 74, "", -1, 46, 46, 42],
    75: ["Claves", "high", 75, "", -1, 37, 75, 42],
    76: ["Hi Wood Block", "high", 76, "", 8, 50, 63, 42],
    77: ["Low Wood Block", "mid", 77, "", 7, 45, 64, 38],
    78: ["Mute Cuica", "high", 78, "", -1, 50, 62, 42],
    79: ["Open Cuica", "high", 79, "", -1, 45, 63, 42],
    80: ["Mute Triangle", "high", 80, "", -1, 37, 75, 42],
    81: ["Open Triangle", "high", 81, "", -1, 37, 75, 42],
}

In [None]:
def midifile2hv_list(file_name, mapping):
    '''
    pattern name must include .mid
    get a MIDI file and convert it to an hv_list (a list of note numbers and velocity)
    use the "mapping" variable to define the type of instrument mapping
    that will be used in the hv_list "all", "16", "8", "3"
    '''
    pattern=[]
    mid=mido.MidiFile(file_name) #create a mido file instance
    sixteenth= mid.ticks_per_beat/4 #find the length of a sixteenth note
    #print ("sixteenth", sixteenth)

    # time: inside a track, it is delta time in ticks (integrer).
    # A delta time is how long to wait before the next message.
    acc=0 #use this to keep track of time

    # depending on the instruments variable select a notemapping
    if mapping=="allinstruments":
        column=2
    elif mapping=="16instruments":
        column=6
    elif mapping=="8instruments":
        column=5
    elif mapping=="3instruments":
        column=7
    else: column = 2 # if no mapping is selected use "allinstrument" mapping

    for i, track in enumerate(mid.tracks):
        for msg in track: #process all messages
            acc += msg.time # accumulate time of any message type
            if msg.type == "note_on" and msg.velocity != 0: # skip velocity 0 format of note off
                if msg.note in list(GM_dict.keys()):
                  midinote = GM_dict[msg.note][column] #remap msg.note by demand
                  rounded_step = int((acc/sixteenth)+0.45)
                  midivelocity = msg.velocity/127 # normalize upfront
                  pattern.append((int(acc/sixteenth), midinote, midivelocity)) # step, note, velocity

        if len(pattern)>0: #just proceed if analyzed pattern has at least one onset

            #round the pattern to the next multiple of 16
            if (rounded_step/16) - (rounded_step//16) != 0:
                pattern_len_in_steps = (rounded_step//16)*16 + 16
            else:
                pattern_len_in_steps = (rounded_step//16)*16

            #create an empty list of lists the size of the pattern
            output_pattern=[[]]*pattern_len_in_steps
            # group the instruments and their velocity that played at a specific step
            i = 0
            for step in range(len(output_pattern)):
                output_pattern.append([(x[1],x[2]) for x in pattern if x[0]==step])
                #make sure no notes are repeated and events are sorted
                output_pattern[step] = list(set(output_pattern[step]))
                output_pattern[step].sort()

    ##################################
    # split the pattern every 16 steps
    ##################################
    hv_lists_split=[]
    for x in range(len(output_pattern)//16):
        patt_fragment = output_pattern[x*16:(x*16)+16]
        patt_density = sum([1 for x in patt_fragment if x!=[]])

        #############################################################
        # filter out patterns that have less than 4 events with notes
        #############################################################
        # NOTE: more conditions could be added (i.e. kick on step 0, etc)
        #############################################################
        if patt_density > 4:
            hv_lists_split.append(patt_fragment)

  # output is a 16-step pattern
    return hv_lists_split

def find_unique_hv_lists(hv_lists_split):
  # input a list of hv_lists and return the set of unique
  unique_hv = list(set([tuple([tuple(step) for step in hv_list]) for hv_list in hv_lists]))
  return unique_hv

In [None]:
# hv list flattening
def flatten_hv_list(hv_list):
  # input an hv list and output a flattened representation as a v_list

  # list of instruments and categories
  lows =  [35, 36, 41, 45, 47, 64, 66]
  mids =  [37, 38, 39, 40, 43, 48, 50, 61, 62, 65, 68, 77]
  his = [22, 26, 42, 44, 46, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 63, 67, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81]

  flat = np.zeros([len(hv_list),1])

  # multiply velocities and categories
  for i,step in enumerate(hv_list):
    step_weight = 0
    for onset in step:
      if onset[0] in lows:
        step_weight += onset[1]*3
      elif onset[0] in mids:
        step_weight += onset[1]*2
      else:
        step_weight += onset[1]*1
    flat[i] = step_weight

  flat = flat/max(flat)
  return flat

In [None]:
def MV_flattening(dualized):
  flat = []
  for step in dualized:
    if len(step) == 2:
      maximum = max([x[1] for x in step])
    elif len(step) == 1:
      maximum = step[0][1]
    else:
      maximum = 0
    flat.append(maximum)
  return flat

In [None]:
# load all patterns tested with four participants
folder_path = '/Users/danielmartinezvillegas/Developer/master-ds/✨TDG/tdg_notebooks/dataset/midi_files/Repetitions/tested_with_four_participants'

# List all files and subfolders in the folder
folder_names_4p, file_names_4p = list_files_and_subfolders(folder_path)


folders_4p = [] # make a container for a data structure of folder names
originals_hv_4p = [] # save the original version in HV format
originals_4p = [] # container for all original flattened patterns
subject_4p = [] # make a container for all subject IDs
repetition_4p = [] #contained for repetition ID
flat_repetition_4p = [] #container for flat repetition
ids_4p = [] # container for pattern IDs

for subfolder in folder_names_4p:
  #print (subfolder.replace(folder_path,""))

  # process original, there is always an "original.mid" in each folder
  originals = midifile2hv_list(subfolder+"/original.mid", "allinstruments")
  originals_flat = [flatten_hv_list(x).T for x in originals]
  originals_flat = np.concatenate((originals_flat[0], originals_flat[1]))
  originals_flat = np.ravel(originals_flat)
  #print ("original", originals_flat)

  # now process the repetitions done by subjects
  s,files_in_subfolder = list_files_and_subfolders(subfolder)
  for file_ in files_in_subfolder:
    if file_ != subfolder+"/original.mid":
      #extract participant name and repetition
      participant = file_.replace(subfolder, "").split(".")[0].split("_")[1]
      repetition = file_.replace(subfolder, "").split(".")[0].split("_")[3]
      # flatten the drum pattern
      hv_list = midifile2hv_list(file_, "allinstruments")
      patt_id = int(subfolder.replace(folder_path,"").split(" ")[0].replace("/[",""))
      dual_flat = []
      for l in hv_list:
        dual_flat.append(MV_flattening(l))
      dual_flat = np.array([item for sublist in dual_flat for item in sublist])
      #print (participant, repetition, dual_flat)

      # create data structures for analysis
      folders_4p.append(subfolder)
      originals_4p.append(originals_flat)
      subject_4p.append(participant)
      repetition_4p.append(repetition)
      flat_repetition_4p.append(dual_flat)
      originals_hv_4p.append(originals)
      ids_4p.append(patt_id)
data_4p = {"folder":folders_4p,
           "pattern id": ids_4p,
           "original hv": originals_hv_4p,
           "original flat":originals_4p,
           "subject": subject_4p,
           "repetition": repetition_4p,
           "dualized flat": flat_repetition_4p}

In [None]:
[x.split('/')[11] for x in folder_names_4p]

In [None]:
# make a dataframe
df_4p = pd.DataFrame(data_4p)
print ("----- SUMMARY -----")
print ("The 'df_4p' dataframe has a", df_4p.shape, "shape")
print ("There are", df_4p.shape[0], "interpretations made by subjects to", len(df_4p['pattern id'].unique()), "unique drum patterns")
print ("As an example, the dataframe has the following columns and content in each column:")
for i,n in enumerate(df_4p.columns.tolist()):
  #print ()
  print ("*", n, ":",df_4p.iloc[0,i])
print ()
print ("Please, note that the patterns are 32 steps long, thus len(original hv) =", len(df_4p.iloc[0,2]), "bars")
print ("This means that we have TWO one-bar patterns times each one of the", len(df_4p['pattern id'].unique()), "unique patterns (",len(df_4p['pattern id'].unique()) * 2, "patterns in total)." )
print ("And also", df_4p.shape[0]*2, "one-bar interpretations by subjects.")

In [None]:
df_4p.sort_values(by='pattern id')['pattern id'].unique()

In [None]:
df_4p

In [None]:
df_4p[df_4p['pattern id'] == 1]['original hv'].iloc[0]

In [None]:
df_4p['original flat'].iloc[0]

In [None]:
df_4p['dualized flat'].iloc[0]

In [None]:
unique_ids = list(set((df_4p["pattern id"].tolist())))
unique_ids
ttd_patterns = []
ttd_repetitions = []
count = 0

for id in unique_ids:
  # look for hvlists that belong to each ids
  twobar_hv = df_4p.loc[df_4p["pattern id"] == id, "original hv"].values[0]

  # split each hv_list pattern into two 16 steps (one-bar) hv_list: A and B
  # they are already split so just select 0 or 1
  hv_A = twobar_hv[0]
  hv_B = twobar_hv[1]
  ttd_patterns.append(hv_A)
  ttd_patterns.append(hv_B)

  # group the repetitions for each pattern in another list
  id_repetitions = df_4p.loc[df_4p["pattern id"] == id, "dualized flat"]
  repetitions_for_id_A = []
  repetitions_for_id_B = []
  for repetition in id_repetitions:
    repA = repetition[:16]
    repB = repetition[16:]
    repetitions_for_id_A.append(repA)
    repetitions_for_id_B.append(repB)

  ttd_repetitions.append(repetitions_for_id_A)
  ttd_repetitions.append(repetitions_for_id_B)

data_ttd = {"pattern": ttd_patterns,
            "repetitions": ttd_repetitions
            }
df_e1 = pd.DataFrame(data_ttd)

In [None]:
df_e1['pattern'][0]

In [None]:
df_e1['repetitions'][0]

In [None]:
df_e1['pattern'][1]

In [None]:
df_e1

In [None]:
# here I load the 16 patterns used in the tap to drum experiment
# as well as the taps made by each one of the 37 valid subjects
# that participated in the experiment

# unfortunately drive and colab do not allow to fetch the CWD of this .ipynb
# therefore I have to look for it IN MY OWN address
# in order for this cell to work for you you'll have to use your address to
# the folder where this "PDG - TTD T2D to FWOD" is located

ipynb_folder = "/Users/danielmartinezvillegas/Developer/master-ds/✨TDG/tdg_notebooks/"

# load the MIDI files that subjects listened to and convert them to hv_list format
t2d_hv_lists = []
midi_files = os.listdir(ipynb_folder+'tap to drum taps/midi patterns')

for file_name in midi_files:
  hv_list = midifile2hv_list(ipynb_folder+'tap to drum taps/midi patterns/'+file_name, "all instruments")
  t2d_hv_lists.append(hv_list)

with open(ipynb_folder+'tap to drum taps/valid_subjects_normalized.pkl', 'rb') as file:
    t2d_taps = pickle.load(file)

# the "t2d_taps" file contains 37 lists of lists. Each list belongs to a pattern
# the sublists are the taps made by this subject to eachh of the 16 patterns


print( "here we have", len(t2d_taps), "lists each belonging to a subject. Each list has", len(t2d_taps[0]), "taps carried out to the", len(t2d_taps[0]), "patterns in the stimuli. We need to transpose this data structure so we get 16 lists each with 37 repetitions.")

# "transpose" the list of lists so that the lists are now patterns (16) and each sublist has 37 lists (subjects)ç

transposed = list(map(list, zip(*t2d_taps)))

print ("After transposing, we have", len(transposed), "lists, each with ", len (transposed[0]), "repetitions.")

df2_e1 = df_e1.copy()

# now we can add the list of repetitions to the df_e1 dataframe
for i, aaa in enumerate(transposed):
  # New row to insert
  new_row = {'pattern': t2d_hv_lists[i][0], 'repetitions': transposed[i]}
  # Insert the new row at the end
  df2_e1.loc[len(df2_e1)] = new_row

df2_e1



In [None]:
len(df2_e1[~(df2_e1['pattern'].isin(df_e1['pattern']))])

In [None]:
len(t2d_hv_lists)

In [None]:
t2d_taps

In [None]:
# Now we export the patterns and find the positions offline.
# we do this because this colab is way too long
all_patts = df_e1['pattern'].tolist()

# export as pickle file
with open('e1_all_hvs.pkl', 'wb') as file:
    pickle.dump(all_patts, file)

In [None]:
e1_positions= [[0.25481138, 0.49981508],
[0.2179403, 0.51954602],
[0.27777847, 0.06701446],
[0.71630662, 0.25267993],
[0.43041888, 0.42072654],
[0.40519822, 0.48152895],
[0.83222983, 0.54226289],
[0.92902444, 0.29793656],
[0.31190826, 0.25127162],
[0.37440468, 0.38623256],
[0.35099213, 0.30081959],
[0.42376579, 0.31108265],
[0.34432537, 0.0798511, ],
[0.3905479, 0.16702623],
[0.54045633, 0.03551223],
[0.37957815, 0.05036911],
[0.33028617, 0.44996507],
[0.385125, 0.35979486],
[0.24655232, 0.40189878],
[0.56584918, 0.22009101],
[0.2012851, 0.32869522],
[0.22260865, 0.38787051],
[0.13664515, 0.37005278],
[0.29416913, 0.38489171],
[0.18195024, 0.1791192, ],
[0.19270558, 0.20844821],
[0.46405735, 0.27302673],
[0.40850124, 0.28987157],
[0.76677301, 0.33896449],
[0.70891163, 0.59471925],
[0.66651777, 0.63579998],
[0.62783619, 0.43221262],
[0.49769044, 0.24926781],
[0.50178841, 0.40933657],
[0.35819347, 0.48532311],
[0.44685095, 0.40419707],
[0.41132253, 0.43267381],
[0.42649261, 0.5220558, ],
[0.32750971, 0.17332979],
[0.46761106, 0.48329648],
[0.75550354, 0.47642278],
[0.93384121, 0.32070003],
[0.29318741, 0.1739194, ],
[0.42579347, 0.17202725],
[0.35563183, 0.55346241],
[0.52196573, 0.39835123],
[0.29508658, 0.2322916, ],
[0.27331406, 0.25483046],
[0. , 0.46440547],
[0.2742823, 0.70285346],
[0.11828873, 0.52326595],
[1. , 0.52866413],
[0.18289882, 0.69457992],
[0.29797083, 0.69244345],
[0.33766636, 0.70401435],
[0.01289696, 0.56200238],
[0.12932893, 0.73133141],
[0.44021464, 0.9328381, ],
[0.58169354, 0.88064595],
[0.32888519, 0.70114832],
[0.33066353, 0.62362863],
[0.59185347, 0.88432219],
[0.74505267, 0.16550339],
[0.10049918, 0.29120842]]

In [None]:
# add the positions to the dataframe

df2_e1['position'] = e1_positions

In [None]:
df2_e1