# CSV files for corpus_anaysis.ipynb

This note book contains the code that builds every csv files needed for the notebook 'corpus_anaysis'.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from glob import glob
from fractions import Fraction
from math import *
import ast

import sys
sys.path.append('../python_scripts')

from metric import get_distance
from progression import get_progression
from data_types import ChordType,PitchType
from constants import TRIAD_REDUCTION
from utils import get_chord_pitches

The coprus used is in the folder ../Data/output_sps_kse-100

Creation of csv file corpus.csv

In [None]:
path_list = []
avg_bin_acc_list = []
avg_root_acc_list = []
avg_triad_acc_list = []
avg_7th_acc_list = []
avg_inv_acc_list = []
avg_sps_list = []
avg_tbt_list = []
avg_vl_list = []

for df_path in glob("../Data/output_sps_kse-100/**/*_results.tsv", recursive=True):
    results_df = pd.read_csv(df_path, sep='\t', index_col=0, converters={'duration': Fraction})
        
    results_df['gt_chord_type'] = results_df['gt_chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
    results_df['est_chord_type'] = results_df['est_chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
    
    results_df['sps_distance'] = results_df.apply(lambda r : get_distance(distance='SPS',
                                                                          root1=r.gt_chord_root,
                                                                          root2=r.est_chord_root,
                                                                          chord_type1=r.gt_chord_type,
                                                                          chord_type2=r.est_chord_type,
                                                                          inversion1=r.gt_chord_inv,
                                                                          inversion2=r.est_chord_inv
                                                                          ), axis=1)
    
    results_df['tbt_distance'] = results_df.apply(lambda r : get_distance(distance='tone by tone',
                                                                          root1=r.gt_chord_root,
                                                                          root2=r.est_chord_root,
                                                                          chord_type1=r.gt_chord_type,
                                                                          chord_type2=r.est_chord_type,
                                                                          inversion1=r.gt_chord_inv,
                                                                          inversion2=r.est_chord_inv,
                                                                          bass_weight=3,
                                                                          root_weight=3
                                                                          ), axis=1)
    
    results_df['vl_distance'] = results_df.apply(lambda r : get_distance(distance='voice leading',
                                                                          root1=r.gt_chord_root,
                                                                          root2=r.est_chord_root,
                                                                          chord_type1=r.gt_chord_type,
                                                                          chord_type2=r.est_chord_type,
                                                                          inversion1=r.gt_chord_inv,
                                                                          inversion2=r.est_chord_inv,
                                                                          bass_weight=3
                                                                          ), axis=1)
    
    results_df['vl_distance'] = results_df['vl_distance'].apply(lambda r :
                                                                r/max(results_df['vl_distance']))
    
    path_list.append(df_path)
    avg_bin_acc_list.append(float(np.average(results_df['full_correct'], weights=results_df['duration'])))
    avg_root_acc_list.append(float(np.average(results_df['root_correct'], weights=results_df['duration'])))
    avg_triad_acc_list.append(float(np.average(results_df['triad_correct'], weights=results_df['duration'])))
    avg_7th_acc_list.append(float(np.average(results_df['7th_correct'], weights=results_df['duration'])))
    avg_inv_acc_list.append(float(np.average(results_df['inv_correct'], weights=results_df['duration'])))
    avg_sps_list.append(float(np.average(results_df['sps_distance'], weights=results_df['duration'])))
    avg_tbt_list.append(float(np.average(results_df['tbt_distance'], weights=results_df['duration'])))
    avg_vl_list.append(float(np.average(results_df['vl_distance'], weights=results_df['duration'])))
    
Chord_symbol_recall_df = pd.DataFrame({'path': path_list,
                                       'binary_accuracy' : avg_bin_acc_list,
                                       'root_accuracy' : avg_root_acc_list,
                                       'triad_accuracy' : avg_triad_acc_list,
                                       '7th_accuracy' : avg_7th_acc_list,
                                       'inversion_accuracy' : avg_inv_acc_list,
                                       'average_sps' : avg_sps_list,
                                       'average_tbt' : avg_tbt_list,
                                       'average_vl' : avg_vl_list
                                       })

Chord_symbol_recall_df['name'] = Chord_symbol_recall_df.path.apply(lambda r :
                                    r[len('../Data/output_sps_kse-100/'):len(r)-len('_results.tsv')])

Chord_symbol_recall_df.to_csv('../Data/corpus.csv', index=False)