# CSV files for sonatas_annotations.ipynb

This note book contains the code that builds every csv files needed for the notebook 'sonatas_annotations'.

In [1]:
import pandas as pd
import numpy as np
import re
import sys
sys.path.append('../python_scripts')

from glob import glob
from fractions import Fraction
from ms3 import Parse

from metric import get_distance
from progression import get_progression
from data_types import ChordType,PitchType
from constants import TRIAD_REDUCTION
from utils import get_chord_pitches

___

Creation of the folder Beethoven_sonata_progression with the the csv files of both annotations of the first movement of Beethoven sonatas.

In [6]:
for dcml_df_path in glob("../Data/sonatas_annotations/Beethoven-labels/dcml*.tsv", recursive=True):

    dcml_df = pd.read_csv(dcml_df_path, sep='\t', converters={'duration': Fraction})
    dcml_df['chord_type'] = dcml_df['chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
    dcml_df.replace({np.nan:None}, inplace=True)
    
    ##
    
    num = dcml_df_path.split('-')[2]
    movement = dcml_df_path.split('-')[3]
    fh_df_path = '../Data/sonatas_annotations/Beethoven-labels/fh-'+num+'-'+movement
    
    fh_df = pd.read_csv(fh_df_path, sep='\t', converters={'duration': Fraction})
    fh_df['chord_type'] = fh_df['chord_type'].apply(lambda r : ChordType[r.split(".")[1]])
    fh_df.replace({np.nan:None}, inplace=True)

    progression = get_progression(dcml_df, fh_df, vl_kws={'bass_weight':3}, tbt_kws={'bass_weight':3, 'root_weight':3})
    
    target_path = '../Data/sonatas_annotations/progressions/'+num+'-1.csv'
    progression.to_csv(target_path, index=False)

___

Creation of the csv file sonata_comparisons.csv with the average distance between both annotations.

In [7]:
df_comparison=[]

for progression_df_path in glob('../Data/sonatas_annotations/progressions/*-1.csv', recursive=True):
    
    num = progression_df_path.split('/')[4].split('-')[0]
    progression = pd.read_csv(progression_df_path)
    
    duration_length = progression.time.iloc[-1]+progression.matched_duration.iloc[-1]
    
    average_sps = np.dot(progression.sps,progression.matched_duration)/duration_length
    average_vl  = np.dot(progression.vl,progression.matched_duration)/duration_length
    average_tbt = np.dot(progression.tbt,progression.matched_duration)/duration_length
    average_bin = np.dot(progression.binary,progression.matched_duration)/duration_length
    
    features = {'number':num,
                'average_sps':average_sps,
                'average_vl' :average_vl,
                'average_tbt':average_tbt,
                'average_bin':average_bin}
    
    df_comparison.append(features)
    
df_comparison = pd.DataFrame(df_comparison).sort_values('number')
df_comparison.to_csv('../Data/sonatas_annotations/sonatas_comparison.csv', index=False)

___
Creation of tsv file thanks to which labels will be added to the mscx file of each sonata.

In [2]:
def change_typo(label:str):
    """
    """
    chord_type_typo = {"MAJOR":lambda x:x,
                   "MINOR":lambda x:x.lower(),
                   "DIMINISHED":lambda x:x.lower()+"o",
                   "AUGMENTED":lambda x:x+"+",
                   "MAJ_MAJ7":lambda x:x+"M7",
                   "MAJ_MIN7":lambda x:x+"7",
                   "MIN_MAJ7":lambda x:x.lower()+"M7",
                   "MIN_MIN7":lambda x:x.lower()+"m7",
                   "DIM7":lambda x:x.lower()+"o7",
                   "HALF_DIM7":lambda x:x.lower()+"%7",
                   "AUG_MIN7":lambda x:x+"+7",
                   "AUG_MAJ7":lambda x:x+"+M7"
                  }
    
    root=label[:1] if label[1]=="_" else label[:2]
    chord_type=label[2:-5] if label[1]=="_" else label[3:-5]
    
    new_label=chord_type_typo[chord_type](root)
    if label[-1] != "0":
        new_label += "_"+label[-1]
        
    return new_label

In [3]:
for dcml_df_path in glob("../Data/sonatas_annotations/Beethoven-labels/dcml*.tsv", recursive=True):

    dcml_df = pd.read_csv(dcml_df_path, sep='\t', converters={'duration': Fraction, 'mn_onset': Fraction})
    dcml_df['time'] = [0]+list(dcml_df.duration.cumsum().astype(float, copy=False)[:-1])

    num = dcml_df_path.split('-')[2]
    progression =  pd.read_csv('../Data/sonatas_annotations/progressions/' + num + '-1.csv')

    label_changes=(progression[['binary', 'annotation1_chord', 'annotation2_chord']].shift() != progression[['binary', 'annotation1_chord', 'annotation2_chord']]).apply(lambda r: r.any(), axis=1)
    annotations=pd.merge(progression[label_changes][['time', 'annotation1_chord', 'annotation2_chord']], dcml_df[['time', 'mc', 'mn_onset']], on='time', how='inner')
    
    annotations["label1"]=annotations.annotation1_chord.apply(lambda x:change_typo(x))
    annotations["label2"]=annotations.annotation2_chord.apply(lambda x:change_typo(x))

    mc=list(annotations.mc)*2
    mn_onset=list(annotations.mn_onset)*2
    label=list(annotations.label1)+list(annotations.label2)
    color=['black']*len(annotations)+['red']*len(annotations)

    df = pd.DataFrame({'mc':mc,
                       'mc_onset':mn_onset,
                       'color_name': color,
                       'label':label
                        }).sort_values(by=['mc', 'mc_onset', 'color_name'])
    
    label1_b=df.label.iloc[0]
    label1_r=df.label.iloc[1]
    df.label.iloc[0]=label1_b + "_dcml"
    df.label.iloc[1]=label1_r + "_fh"
    
    no_repetition = df.label.shift(2) != df.label

    target_path = '../Data/Beethoven_sonatas_scores/labels/'+num+'-1.tsv'
    df[no_repetition].to_csv(target_path, index=False, sep = '\t')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

In [7]:
for labels_df_path in glob("../Data/Beethoven_sonatas_scores/labels/*.tsv", recursive=True):
    
    num = labels_df_path.split('/')[4][:2]
    print(num)
    if num == '30' or num == '08' or num == '17':
        continue

    basename=labels_df_path.split('/')[4].split('.')[0]
    annotations_dir="../Data/Beethoven_sonatas_scores"
    labels_dir="../Data/Beethoven_sonatas_scores"
 
    # Add musescore and tsv suffixes to filename match
    filename_regex = re.compile(basename + "\\.(mscx|tsv)")             

    # Parse scores and tsvs
    parse = Parse(annotations_dir, file_re=filename_regex)                   

    parse.add_dir(labels_dir, key="labels", file_re=filename_regex)               
    parse.parse()

    # Write annotations to score
    parse.add_detached_annotations("MS3", "labels")
    parse.attach_labels(staff=2, voice=1, check_for_clashes=False)

    # Write score out to file
    parse.store_mscx(root_dir=labels_dir, suffix="_comparison", overwrite=True)

30
17
09
ERROR    Parse -- parse.py (line 2004) _handle_path():
	The file name 09-1.tsv is already registered for key 'labels' and both files have the relative path labels.
	Load one of the identically named files with a different key using add_dir(key='KEY').
INFO     Parse -- parse.py (line 1709) parse_mscx():
	All 2 files have been parsed successfully.


AttributeError: 'DataFrame' object has no attribute 'expanded'

In [13]:
basename="30-1"
annotations_dir="../Data/Beethoven_sonatas_scores"
labels_dir="../Data/Beethoven_sonatas_scores"
 
# Add musescore and tsv suffixes to filename match
filename_regex = re.compile(basename + "\\.(mscx|tsv)")             

# Parse scores and tsvs
parse = Parse(annotations_dir, file_re=filename_regex)                   

parse.add_dir(labels_dir, key="labels", file_re=filename_regex)               
parse.parse()

# Write annotations to score
parse.add_detached_annotations("MS3", "labels")
parse.attach_labels(staff=2, voice=1, check_for_clashes=False)

# Write score out to file
parse.store_mscx(root_dir=labels_dir, suffix="_comparison", overwrite=True)

ERROR    Parse -- parse.py (line 1579) _handle_path():
	The file name 30-1.tsv is already registered for key 'labels' and both files have the relative path labels.
	Load one of the identically named files with a different key using add_dir(key='KEY').
	The incomplete MC 17 (timesig 1/2, act_dur 1/4) is completed by 1 incorrect duration (expected: 1/4):
	{18: Fraction(1, 2)}
	The incomplete MC 17 (timesig 1/2, act_dur 1/4) is completed by 1 incorrect duration (expected: 1/4):
	{18: Fraction(1, 2)}


  new_label_col = new_label_col.str.replace('^/$', 'empty_harmony').replace('', np.nan)
  new_label_col = new_label_col.str.replace('^/$', 'empty_harmony').replace('', np.nan)


INFO     Parse -- parse.py (line 1339) parse_mscx():
	All 2 files have been parsed successfully.


  new_label_col = new_label_col.str.replace('^/$', 'empty_harmony').replace('', np.nan)


UnboundLocalError: local variable 'ix' referenced before assignment