# CSV files for metrics_features.ipynb

This note book contains the code that builds every csv files needed for the notebook 'metrics_features'.

In [1]:
import pandas as pd
import numpy as np

from glob import glob
from fractions import Fraction
from math import *

from chord_eval.metric import get_distance
from chord_eval.data_types import ChordType, PitchType
from chord_eval.constants import TRIAD_REDUCTION

___
## Metric comparison

Creation of csv file dist_to_Cmaj.csv

In [2]:
chord_types = [chord_type for chord_type in ChordType] * 12*4
midi_roots = [i for k in list(np.arange(0,12)) for i in [k] * 12] * 4
tpc_roots = [i for k in [0,-5,2,-3,4,-1,-6,1,-4,3,-2,5] for i in [k] * 12] * 4
inversions = [i for k in list(np.arange(4)) for i in [k] * 12*12]

dist_to_Cmaj = pd.DataFrame({'midi_root':midi_roots,
                             'tpc_root':tpc_roots,
                             'chord_type':chord_types,
                             'inversion':inversions
                             })

In [3]:
distance = 'SPS'
SPS_dist = [get_distance(distance=distance, root1=0, root2=r.midi_root,
                         chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                         inversion1=0, inversion2=r.inversion)
            for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['SPS_dist'] = SPS_dist

In [4]:
distance = 'voice leading'
voice_leading_dist = [get_distance(distance=distance, root1=0, root2=r.midi_root,
                                   chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                                   inversion1=0, inversion2=r.inversion, 
                                   bass_weight=3)
                      for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['voice_leading_dist'] = voice_leading_dist

In [5]:
distance = 'voice leading'
vl_dist_dupl = [get_distance(distance=distance, root1=0, root2=r.midi_root,
                                   chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                                   inversion1=0, inversion2=r.inversion, 
                                   bass_weight=3, duplicate_bass=False)
                      for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['vl_dist_nodupl'] = vl_dist_dupl

In [6]:
distance = 'voice leading'
vl_dist_tpc = [get_distance(distance=distance, root1=0, root2=r.tpc_root,
                                   chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                                   inversion1=0, inversion2=r.inversion, 
                                   bass_weight=3, pitch_type=PitchType.TPC, only_bass_tpc=False)
                      for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['vl_dist_tpc'] = vl_dist_tpc

In [7]:
distance = 'voice leading'
vl_dist_tpc_dupl = [get_distance(distance=distance, root1=0, root2=r.tpc_root,
                                   chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                                   inversion1=0, inversion2=r.inversion, 
                                   bass_weight=3, duplicate_bass=False, pitch_type=PitchType.TPC, only_bass_tpc=False)
                      for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['vl_dist_tpc_nodupl'] = vl_dist_tpc_dupl

In [8]:
distance = 'voice leading'
vl_dist_tpc_dupl = [get_distance(distance=distance, root1=0, root2=r.tpc_root,
                                   chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                                   inversion1=0, inversion2=r.inversion, 
                                   bass_weight=3, pitch_type=PitchType.TPC, only_bass_tpc=True)
                      for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['vl_dist_tpc_bass'] = vl_dist_tpc_dupl

In [9]:
distance = 'tone by tone'
tone_by_tone_dist = [get_distance(distance=distance, root1=0, root2=r.midi_root,
                                  chord_type1=ChordType.MAJOR, chord_type2=r.chord_type,
                                  inversion1=0, inversion2=r.inversion,
                                  root_weight=3, bass_weight=3)
                     for idx_r, r in dist_to_Cmaj.iterrows()]
dist_to_Cmaj['tone_by_tone_dist'] = tone_by_tone_dist

In [10]:
dist_to_Cmaj.to_csv('../Data/dist_to_Cmaj.csv', index=False)

___

In [2]:
dist_to_Cmaj = pd.read_csv('../Data/dist_to_Cmaj.csv')
TRIAD = set(TRIAD_REDUCTION[chord_type] for chord_type in ChordType)
dist_to_Cmaj['chord_type'] = dist_to_Cmaj['chord_type'].apply(lambda r : ChordType[r.split(".")[1]])

In [3]:
dist_to_Cmaj.drop(dist_to_Cmaj.query('chord_type in @TRIAD and inversion == 3').index, inplace=True)
dist_to_Cmaj['chord_type_inv'] = dist_to_Cmaj.apply(lambda r : str(r.chord_type).split(".")[1]+"_inv"+str(r.inversion), axis=1)

___
## Program comparison for SPS

Creation of csv file sps_programs_toC.csv

In [11]:
roots = [i for k in list(np.arange(0,12)) for i in [k]] * 4
programs = [i for k in (0,24,40,56) for i in [k] * 12]

sps_programs_toC = pd.DataFrame({'root':roots,
                                 'program':programs
                                 })

In [12]:
dist_piano = [get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         program1=0, program2=r.program)
            for idx_r, r in sps_programs_toC.iterrows()]
sps_programs_toC['Piano'] = dist_piano

In [13]:
dist_guitare = [get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         program1=24, program2=r.program)
            for idx_r, r in sps_programs_toC.iterrows()]
sps_programs_toC['Guitare'] = dist_guitare

In [14]:
dist_violin = [get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         program1=40, program2=r.program)
            for idx_r, r in sps_programs_toC.iterrows()]
sps_programs_toC['Violin'] = dist_violin

In [15]:
dist_trumpet = [get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         program1=56, program2=r.program)
            for idx_r, r in sps_programs_toC.iterrows()]
sps_programs_toC['Trumpet'] = dist_trumpet

In [16]:
sps_programs_toC.to_csv('../Data/sps_programs_toC.csv', index=False)

___
## Transform comparison for SPS

Creation of csv file sps_transform_toC.csv

In [17]:
roots = [i for k in list(np.arange(0,12)) for i in [k]]
sps_transform_toC = pd.DataFrame({'root':roots})

dist_stft = []
dist_stft_filtered = []
dist_stft_peak = []
dist_cqt = []
dist_vqt = []
dist_mel = []

for idx_r, r in sps_transform_toC.iterrows() :
    dist_stft.append(get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         transform='stft'))
    
    dist_stft_filtered.append(get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         transform='stft', noise_filtering=True))
    
    dist_stft_peak.append(get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         transform='stft', peak_picking=True))
    
    dist_cqt.append(get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         transform='cqt'))
    
    dist_vqt.append(get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         transform='vqt'))
    
    dist_mel.append(get_distance(distance='SPS', root1=0, root2=r.root,
                         chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                         transform='mel'))
    
sps_transform_toC['stft'] = dist_stft
sps_transform_toC['stft filtered'] = dist_stft_filtered
sps_transform_toC['stft peak'] = dist_stft_peak
sps_transform_toC['cqt'] = dist_cqt
sps_transform_toC['vqt'] = dist_vqt
sps_transform_toC['mel'] = dist_mel



In [18]:
sps_transform_toC.to_csv('../Data/sps_transform_toC.csv', index=False)

___
## Weight comparison for VL

Creation of csv file vl_weighted_toC.csv

In [19]:
roots = [i for k in list(np.arange(0,12)) for i in [k]] * 3
inversions = [i for k in list(np.arange(3)) for i in [k] * 12]

vl_weighted_toC = pd.DataFrame({'root':roots,
                             'inversion':inversions
                             })

dist_w1 = []
dist_w2 = []
dist_w3 = []

for idx_r, r in vl_weighted_toC.iterrows() :
    dist_w1.append(get_distance(distance='voice leading', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=1))
    
    dist_w2.append(get_distance(distance='voice leading', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=2))
    
    dist_w3.append(get_distance(distance='voice leading', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=3))
    
vl_weighted_toC['w1'] = dist_w1
vl_weighted_toC['w2'] = dist_w2
vl_weighted_toC['w3'] = dist_w3

In [20]:
vl_weighted_toC.to_csv('../Data/vl_weighted_toC.csv', index=False)

___
## Weight comparison for TbT

Creation of csv file tbt_weighted_toC.csv

In [21]:
roots = [i for k in list(np.arange(0,12)) for i in [k]] * 3
inversions = [i for k in list(np.arange(3)) for i in [k] * 12]

tbt_weighted_toC = pd.DataFrame({'root':roots,
                                'inversion':inversions
                                })
dist_wb1_r1 = []
dist_wb2_r1 = []
dist_wb3_r1 = []

dist_wb1_r2 = []
dist_wb2_r2 = []
dist_wb3_r2 = []

dist_wb1_r3 = []
dist_wb2_r3 = []
dist_wb3_r3 = []

for idx_r, r in tbt_weighted_toC.iterrows() :
    dist_wb1_r1.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=1))
    
    dist_wb2_r1.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=2))
    
    dist_wb3_r1.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=3))
                                 
    ##
    
    dist_wb1_r2.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=1, root_weight=2))
    
    dist_wb2_r2.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=2, root_weight=2))
    
    dist_wb3_r2.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=3, root_weight=2))
                                 
    ##
                                 
    dist_wb1_r3.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=1, root_weight=3))
    
    dist_wb2_r3.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=2, root_weight=3))
    
    dist_wb3_r3.append(get_distance(distance='tone by tone', root1=0, root2=r.root,
                                chord_type1=ChordType.MAJOR, chord_type2=ChordType.MINOR,
                                inversion1=0, inversion2=r.inversion,
                                bass_weight=3, root_weight=3))                             
    
tbt_weighted_toC['bass w1, root w1'] = dist_wb1_r1
tbt_weighted_toC['bass w2, root w1'] = dist_wb2_r1
tbt_weighted_toC['bass w3, root w1'] = dist_wb3_r1

tbt_weighted_toC['bass w1, root w2'] = dist_wb1_r2
tbt_weighted_toC['bass w2, root w2'] = dist_wb2_r2
tbt_weighted_toC['bass w3, root w2'] = dist_wb3_r2

tbt_weighted_toC['bass w1, root w3'] = dist_wb1_r3
tbt_weighted_toC['bass w2, root w3'] = dist_wb2_r3
tbt_weighted_toC['bass w3, root w3'] = dist_wb3_r3

In [22]:
tbt_weighted_toC.to_csv('../Data/tbt_weighted_toC.csv', index=False)

___
## Corpus analysis

The coprus used is in the folder ../Data/output_sps_kse-100

Creation of csv file corpus.csv

In [30]:
path_list = []
avg_bin_acc_list = []
avg_root_acc_list = []
avg_triad_acc_list = []
avg_7th_acc_list = []
avg_inv_acc_list = []
avg_sps_list = []
avg_tbt_list = []
avg_vl_list = []

for df_path in glob("../Data/output_sps_kse-100/**/*_results.tsv", recursive=True):
    results_df = pd.read_csv(df_path, sep='\t', index_col=0, converters={'duration': Fraction})
        
    results_df['gt_chord_type'] = results_df['gt_chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
    results_df['est_chord_type'] = results_df['est_chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
    
    results_df['sps_distance'] = results_df.apply(lambda r : get_distance(distance='SPS',
                                                                          root1=r.gt_chord_root,
                                                                          root2=r.est_chord_root,
                                                                          chord_type1=r.gt_chord_type,
                                                                          chord_type2=r.est_chord_type,
                                                                          inversion1=r.gt_chord_inv,
                                                                          inversion2=r.est_chord_inv
                                                                          ), axis=1)
    
    results_df['tbt_distance'] = results_df.apply(lambda r : get_distance(distance='tone by tone',
                                                                          root1=r.gt_chord_root,
                                                                          root2=r.est_chord_root,
                                                                          chord_type1=r.gt_chord_type,
                                                                          chord_type2=r.est_chord_type,
                                                                          inversion1=r.gt_chord_inv,
                                                                          inversion2=r.est_chord_inv,
                                                                          bass_weight=3,
                                                                          root_weight=3
                                                                          ), axis=1)
    
    results_df['vl_distance'] = results_df.apply(lambda r : get_distance(distance='voice leading',
                                                                          root1=r.gt_chord_root,
                                                                          root2=r.est_chord_root,
                                                                          chord_type1=r.gt_chord_type,
                                                                          chord_type2=r.est_chord_type,
                                                                          inversion1=r.gt_chord_inv,
                                                                          inversion2=r.est_chord_inv,
                                                                          bass_weight=3
                                                                          ), axis=1)
    
    results_df['vl_distance'] = results_df['vl_distance'].apply(lambda r :
                                                                r/max(results_df['vl_distance']))
    
    path_list.append(df_path)
    avg_bin_acc_list.append(float(np.average(results_df['full_correct'], weights=results_df['duration'])))
    avg_root_acc_list.append(float(np.average(results_df['root_correct'], weights=results_df['duration'])))
    avg_triad_acc_list.append(float(np.average(results_df['triad_correct'], weights=results_df['duration'])))
    avg_7th_acc_list.append(float(np.average(results_df['7th_correct'], weights=results_df['duration'])))
    avg_inv_acc_list.append(float(np.average(results_df['inv_correct'], weights=results_df['duration'])))
    avg_sps_list.append(float(np.average(results_df['sps_distance'], weights=results_df['duration'])))
    avg_tbt_list.append(float(np.average(results_df['tbt_distance'], weights=results_df['duration'])))
    avg_vl_list.append(float(np.average(results_df['vl_distance'], weights=results_df['duration'])))
    
Chord_symbol_recall_df = pd.DataFrame({'path': path_list,
                                       'binary_accuracy' : avg_bin_acc_list,
                                       'root_accuracy' : avg_root_acc_list,
                                       'triad_accuracy' : avg_triad_acc_list,
                                       '7th_accuracy' : avg_7th_acc_list,
                                       'inversion_accuracy' : avg_inv_acc_list,
                                       'average_sps' : avg_sps_list,
                                       'average_tbt' : avg_tbt_list,
                                       'average_vl' : avg_vl_list
                                       })

Chord_symbol_recall_df['name'] = Chord_symbol_recall_df.path.apply(lambda r :
                                    r[len('../Data/output_sps_kse-100/'):len(r)-len('_results.tsv')])

Chord_symbol_recall_df.to_csv('../Data/corpus.csv', index=False)

___
## Program analysis

The coprus used is in the folder ../Data/output_sps_kse-100

Creation of csv file programs_corpus_df.csv

In [None]:
program_num_list = (0, 11, 19, 24, 40, 43, 53, 56, 66, 71, 73)
program_name_list = ('piano',
                    'vibraphone',
                    'organe',
                    'guitare',
                    'violin',
                    'contrabass',
                    'voice',
                    'trumpet',
                    'saxophone',
                    'clarinette',
                    'flute')

programs_corpus_df = pd.DataFrame()

for program, num in zip(program_name_list, program_num_list):
    
    path_list = []
    avg_bin_acc_list = []
    avg_root_acc_list = []
    avg_triad_acc_list = []
    avg_7th_acc_list = []
    avg_inv_acc_list = []
    avg_sps_list = []
    
    for df_path in glob("../Data/output_sps_kse-100/**/*_results.tsv", recursive=True):
        
        results_df = pd.read_csv(df_path, sep='\t', index_col=0, converters={'duration': Fraction})

        results_df['gt_chord_type'] = results_df['gt_chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
        results_df['est_chord_type'] = results_df['est_chord_type'].apply(lambda r : ChordType[r.split(".")[1]])

        results_df['sps_distance'] = results_df.apply(lambda r : get_distance(distance='SPS',
                                                                              root1=r.gt_chord_root,
                                                                              root2=r.est_chord_root,
                                                                              chord_type1=r.gt_chord_type,
                                                                              chord_type2=r.est_chord_type,
                                                                              inversion1=r.gt_chord_inv,
                                                                              inversion2=r.est_chord_inv,
                                                                              program1=num,
                                                                              program2=num
                                                                              ), axis=1)

        path_list.append(df_path)
        avg_bin_acc_list.append(float(np.average(results_df['full_correct'], weights=results_df['duration'])))
        avg_root_acc_list.append(float(np.average(results_df['root_correct'], weights=results_df['duration'])))
        avg_triad_acc_list.append(float(np.average(results_df['triad_correct'], weights=results_df['duration'])))
        avg_7th_acc_list.append(float(np.average(results_df['7th_correct'], weights=results_df['duration'])))
        avg_inv_acc_list.append(float(np.average(results_df['inv_correct'], weights=results_df['duration'])))
        avg_sps_list.append(float(np.average(results_df['sps_distance'], weights=results_df['duration'])))

    Chord_symbol_recall_df = pd.DataFrame({'path': path_list,
                                           'binary_accuracy' : avg_bin_acc_list,
                                           'root_accuracy' : avg_root_acc_list,
                                           'triad_accuracy' : avg_triad_acc_list,
                                           '7th_accuracy' : avg_7th_acc_list,
                                           'inversion_accuracy' : avg_inv_acc_list,
                                           'average_sps' : avg_sps_list,
                                           'program' : [program]*len(path_list)
                                           })

    Chord_symbol_recall_df['name'] = Chord_symbol_recall_df.path.apply(lambda r :
                                        r[len('./output_sps_kse-100/'):len(r)-len('_results.tsv')])
    
    programs_corpus_df=pd.concat([programs_corpus_df, Chord_symbol_recall_df], ignore_index=True)

programs_corpus_df.to_csv('../Data/programs_corpus_df.csv', index=False)