In [162]:
import pandas as pd
import numpy as np
import os
import re
from collections import defaultdict

In [163]:
# translate key
rtoi = {"I":0, "II":1, "III":2, "IV":3, "V":4, "VI": 5, "VII":6 }

scale = defaultdict(list)
scale = {
    'C': ['C', 'D', 'E', 'F', 'G', 'A', 'B'],
    'G': ['G', 'A', 'B', 'C', 'D', 'E', 'F#'],
    'D': ['D', 'E', 'F#', 'G', 'A', 'B', 'C#'],
    'A': ['A', 'B', 'C#', 'D', 'E','F#','G#'],
    'E': ['E', 'F#','G#', 'A', 'B', 'C#', 'D#'],
    'B': ['B', 'C#', 'D#', 'E', 'F#', 'G#', 'A#'],
    'F#': ['F#', 'G#', 'A#', 'B', 'C#', 'D#', 'E#'],
    'Gb': ['G-', 'A-', 'B-','C-', 'D-', 'E-', 'F'],
    'Db': ['D-', 'E-', 'F', 'G-', 'A-','B-','C','D-'],
    'C#': ['C#', 'D#', 'E#','F#', 'G#', 'A#', 'B#','C#'],
    'Ab': ['A-', 'B-', 'C', 'D-', 'E-', 'F', 'G', 'Ab'],
    'Eb': ['E-','F','G','A-','B-','C', 'D','E-'],
    'Bb': ['B-','C','D','E-','F','G','A','B-'],
    'F': ['F', 'G', 'A','B-','C','D','E'],
    
    'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'A'],
    'e': ['E', 'F#', 'G', 'A', 'B', 'C', 'D', 'E'],
    'b': ['B', 'C#', 'D', 'E', 'F#', 'G', 'A', 'B'],
    'd': ['D', 'E', 'F', 'G', 'A', 'B-', 'C', 'D'],
    'g': ['G', 'A', 'B-', 'C', 'D', 'E-', 'F', 'G'],
    'c': ['C', 'D', 'E-', 'F', 'G', 'A-', 'B-', 'C'],
    'f': ['F', 'G', 'A-', 'B-', 'C', 'D-', 'E-', 'F'],
    "c#": ['C#','D#', 'E', 'F#', 'G#', 'A', 'B', 'C#'],
    'g#': ['G#', 'A#','B', 'C#','D#', 'E', 'F#', 'G#'],

}



In [166]:
def concate_df(df1, df2):
    final_df = df1[['chord', "measure", "beat"]]
    # append  columns
    final_df[['label',"mn",'globalkey', 'localkey','globalkey_is_minor', 'localkey_is_minor', 'numeral', 'chord_type', 'relativeroot']] \
    = df2[["label","mn",'globalkey', 'localkey','globalkey_is_minor', 'localkey_is_minor', 'numeral', 'chord_type', 'relativeroot']]
    final_df["new_key"] = np.nan
    for index, row in final_df.iterrows():
        global_key = ''.join(row[['globalkey']].values)
        local_key = ''.join(row[['localkey']].values)
        gl_is_minor = row[['globalkey_is_minor']].values
        lc_is_minor = row[['localkey_is_minor']].values

        # process global key, all are in single "#" and "b"
        scale_cur = scale[global_key] # global key is case sensitive

        # process local key
        roman_char = re.findall('[ivIV]+', local_key)#.upper()
        roman = ''.join(roman_char).upper()
        num = rtoi[roman]
        new_key = scale_cur[num]
        if lc_is_minor == 1: # to minor key 
            new_key = new_key.lower()

        #num_exp = 0
        if "#" in local_key: # All local key records are single "#"
            if "-" in new_key:
                new_key = new_key.replace("-", "")
            else:
                new_key += '#'
        elif "b" in local_key: # there are 10 records in all local key that contains "bb"
            num_flats = len(re.findall("b", local_key))
            if "#" in new_key:
                num_sharp = len(re.findall("#", new_key)) # should be 0 or 1
                if num_flats >= num_sharp: 
                    # remove all sharps and add flats
                    num_flats -= num_sharp
                    new_key = new_key.replace("#", "")
            new_key += num_flats * '-'

        # add to datframe
        final_df.loc[index,"new_key"] = new_key
        
    return final_df

    
    

In [167]:
### apply code above to all dataframes
script_dir = os.getcwd()

v1_folder = "../datasets/ABC/ABC_dataframe/harmonies_v1"
v1_path = os.path.join(script_dir, v1_folder)
v2_folder = "../datasets/ABC/ABC_dataframe/harmonies_v2"
v2_path = os.path.join(script_dir, v2_folder)
bad_file = []

# for file in data v2
for subdir, dirs, files in os.walk(v2_path):
     for idx, data_v2 in enumerate(files):
            if not data_v2.startswith('.'):
                
                # find matching file in v1 folder
                op = data_v2[data_v2.find("p")+1 : data_v2.find("_")]
                no = int(data_v2[data_v2.find("n")+1 : data_v2.find("o")])
                mvt = int(data_v2[data_v2.find("_")+1 : data_v2.find(".")])
                data_v1_filename = "op" +  op + "_no" + str(no) + "_mov" + str(mvt) +".tsv"
                
                # load dataframes 
                data_v2_path = os.path.join(subdir, data_v2)
                df2 = pd.read_csv(data_v2_path,sep='\t')
                data_v1_path = os.path.join(v1_path, data_v1_filename)
                df1 = pd.read_csv(data_v1_path,sep='\t')
                try:
                    final_df = concate_df(df1, df2)
                    save_name = "../datasets/ABC/ABC_dataframe/final_df/" + data_v2.split(".")[0] + ".csv"
                    final_df.to_csv(save_name, sep="\t", index=False)
                except:
                    bad_file.append(data_v1_filename)

                
                
            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [168]:
print(bad_file)
len(bad_file)

[]


0

In [171]:
# manual check
dff = pd.read_csv('../datasets/ABC/ABC_dataframe/final_df/n02op18_04.csv',sep='\t')
pd.set_option('display.max_rows', dff.shape[0]+1)
dff


Unnamed: 0,chord,measure,beat,label,mn,globalkey,localkey,globalkey_is_minor,localkey_is_minor,numeral,chord_type,relativeroot,new_key
0,.G.I,1,1.0,.G.I,1,G,I,0,0,I,M,,G
1,V7,3,1.0,V7,3,G,I,0,0,V,Mm7,,G
2,I,5,1.0,I,5,G,I,0,0,I,M,,G
3,V.ii,6,1.0,V.ii,6,G,V,0,0,ii,m,,D
4,V(64),7,1.0,V(64),7,G,V,0,0,V,M,,D
5,V7,7,2.0,V7,7,G,V,0,0,V,Mm7,,D
6,I\\\\,8,1.0,I\\,8,G,V,0,0,I,M,,D
7,I.V7,9,1.0,I.V7,9,G,I,0,0,V,Mm7,,G
8,I,11,1.0,I,11,G,I,0,0,I,M,,G
9,I6,12,1.0,I6,12,G,I,0,0,I,M,,G
