In [1]:
import pandas as pd
import numpy as np
import os
import re
import math
from packaging import version

In [2]:

# INDEXING
transpose = {'downm2':-2, 'downmm2':-1, 'A4':5, 'upm2':1, 'downm3':-4, 'upm3':3, 
             'upP4':4, 'downP4': -5, 'upmm3':2, "upmm2":0, 'downmm3':-3 }

## hardcoded transposing regards to original key to keep consistent with humdrum transpose scheme
trans_by_org_key = {
    'F' : ['G-', 'G', 'A-', 'A', 'B-', 'B', 'C', 'D-', 'D', 'E-', 'E'],
    'G' : ['A-', 'A', 'B-', 'B', 'C', 'C#', 'D', 'E-', 'E', 'F', 'F#'],
    'C' : ['D-', 'D', 'E-', 'E', 'F', 'F#', 'G', 'A-', 'A', 'B-', 'B'],
    'B-' : ['C-', 'C', 'D-', 'D', 'E-', 'E', 'F', 'G-', 'G', 'A-', 'A'],
    'D' : ['E-', 'E', 'F', 'F#', 'G', 'G#', 'A', 'B-', 'B', 'C', 'C#'],
    'A-' : ['B--', 'B-', 'C-', 'C', 'D-', 'D', 'E-', 'F-', 'F', 'G-', 'G'],
    'A' : ['B-', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#'],
    'E-' : ['F-', 'F', 'G-', 'G', 'A-', 'A', 'B-', 'C-', 'C', 'D-', 'D'],
    'E' : ['F', 'F#', 'G', 'G#', 'A', 'A#', 'B', 'C', 'C#', 'D', 'D#'],
    'G-' : ['A--', 'A-', 'B--', 'B-', 'C-', 'C', 'D-', 'E--', 'E-', 'F-', 'F'],
    'D-' : ['E--', 'E-', 'F-', 'F', 'G-', 'G', 'A-', 'B--', 'B-', 'C-', 'C'],
    'F#' : ['G', 'G#', 'A', 'A#', 'B', 'B#', 'C#', 'D', 'D#', 'E', 'E#'],
    'C#' : ['D', 'D#', 'E', 'E#', 'F#', 'F##', 'G#', 'A', 'A#', 'B', 'B#'],
    'C-' : ['D--', 'D-', 'E--', 'E-', 'F-', 'F', 'G-', 'B--', 'B-', 'A--' ,'A-'],
    'D#' : ['E', 'E#', 'F#', 'F##', 'G#', 'G##', 'A#', 'B', 'B#', 'C#', 'C##'],
    'B' : ['C', 'C#', 'D', 'D#', 'E', 'E#', 'F#', 'G', 'G#', 'A', 'A#'],
    
    'f' : ['g-', 'g', 'a-', 'a', 'b-', 'b', 'c', 'd-', 'd', 'e-', 'e'],
    'g' : ['a-', 'a', 'b-', 'b', 'c', 'c#', 'd', 'e-', 'e', 'f', 'f#'],
    'c' : ['d-', 'd', 'e-', 'e', 'f', 'f#', 'g', 'a-', 'a', 'b-', 'b'],
    'b-' : ['c-', 'c', 'd-', 'd', 'e-', 'e', 'f', 'g-', 'g', 'a-', 'a'],
    'd' : ['e-', 'e', 'f', 'f#', 'g', 'g#', 'a', 'b-', 'b', 'c', 'c#'],
    'a-' : ['b--', 'b-', 'c-', 'c', 'd-', 'd', 'e-', 'f-', 'f', 'g-', 'g'],
    'a' : ['b-', 'b', 'c', 'c#', 'd', 'd#', 'e', 'f', 'f#', 'g', 'g#'],
    'e-' : ['f-', 'f', 'g-', 'g', 'a-', 'a', 'b-', 'c-', 'c', 'd-', 'd'],
    'e' : ['f', 'f#', 'g', 'g#', 'a', 'a#', 'b', 'c', 'c#', 'd', 'd#'],
    'g-' : ['a--', 'a-', 'b--', 'b-', 'c-', 'c', 'd-', 'e--', 'e-', 'f-', 'f'],
    'd-' : ['e--', 'e-', 'f-', 'f', 'g-', 'g', 'a-', 'b--', 'b-', 'c-', 'c'],
    'f#' : ['g', 'g#', 'a', 'a#', 'b', 'b#', 'c#', 'd', 'd#', 'e', 'e#'],
    'c#' : ['d', 'd#', 'e', 'e#', 'f#', 'f##', 'g#', 'a', 'a#', 'b', 'b#'],
    'c-' : ['d--', 'd-', 'e--', 'e-', 'f-', 'f', 'g-', 'b--', 'b-', 'a--' ,'a-'],
    'd#' : ['e', 'e#', 'f#', 'f##', 'g#', 'g##', 'a#', 'b', 'b#', 'c#', 'c##'],
    'b' : ['c', 'c#', 'd', 'd#', 'e', 'e#', 'f#', 'g', 'g#', 'a', 'a#'],
    'g#' : ['a', 'a#', 'b', 'b#', 'c#', 'c##', 'd#', 'e', 'e#', 'f#', 'f##']
    }

In [3]:
def construct_RM(chord, numeral, chord_type, relativeroot):
    output_chord = ''
    if ('Ger' or 'It6' or 'Fr6') in chord:
        if 'Ger6' in chord:
            output_chord = 'Ger'
        elif 'It6' in chord:
            output_chord = 'It'
        elif "Fr6" in chord:
            output_chord = 'Fr'
        if relativeroot != 'V':
            output_chord = output_chord + "/" + relativeroot[2:] 
        
    else:
        if chord_type == 'M': 
            output_chord = numeral
        elif chord_type == 'm':
            output_chord = numeral
        elif chord_type == '+':
            output_chord = numeral + "+"
        elif chord_type == 'o':
            output_chord = numeral + "o"
            
        elif chord_type == '+7':
            output_chord = numeral + "+M7"
        elif chord_type == 'Mm7':
            output_chord = numeral + "7"
        elif chord_type == 'MM7':
            output_chord = numeral + "M7"
        elif chord_type == 'mm7':
            output_chord = numeral.lower() + "7"
        elif chord_type == '%7':
            output_chord = numeral.lower() + "om7"
        elif chord_type == 'o7':
            output_chord = numeral.lower() + "oD7"

        if not pd.isnull(relativeroot):
            output_chord = output_chord + "/" + relativeroot
    return output_chord

def get_mea_num(measure):
        if measure == '.':
            measure = 1
        else:
            measure = int(re.findall("\d+", measure)[0])
        return measure
            
        

In [4]:
print(construct_RM('.bII6', 'bII', 'M', pd.NA))

bII


In [5]:
script_dir = os.getcwd()
final_df_folder = '../../datasets/ABC/ABC_dataframe/final_df/'
final_df_path = os.path.join(script_dir, final_df_folder)
destin_folder = '../../datasets/ABC/ABC_org_score_for_vec' # change for reduced !!!!
#destin_folder = '../../datasets/ABC/ABC_reduced_score_for_vec'
bad_files = []

for subdir, dirs, files in os.walk(final_df_path):
     for idx, final_df in enumerate(files):
            #print (final_df)
            print("processing_df", final_df)
            df_filename = final_df.split(".")[0]
            match_scores = [fn for fn in os.listdir(destin_folder)
              if df_filename in fn]
            #print(match_scores)
            for score in match_scores:
                try:
                    print("match_score", score)
                    trans_num = 20 # org_score
                    if "trans" in score:
                        #  change for reduced !!!!
                        trans_string = score[score.index(df_filename+"_")+len(df_filename+"_"):score.index('_trans')] # ! change for reduced dataset
                        #trans_string = score[score.index(df_filename+"_score_reduced4_tmp_")+len(df_filename+"_score_reduced4_tmp_"):score.index('_trans')]
                        trans_num = transpose[trans_string]

                    # load score
                    score_path = os.path.join(destin_folder, score)
                    load_score = pd.read_csv(score_path,sep='\t')
                    # load dataframe 
                    final_df_path = os.path.join(final_df_folder, final_df)
                    load_df = pd.read_csv(final_df_path,sep='\t')

                    load_score.columns = ["voice4", "dy4", "voice3", "dy3", "voice2", "dy2", "voice1", "dy1",
                                  "harm", "beat", "measure", "key", "meter" ]  # change for reduced !!!!
                    #load_score.columns = ["voice4", "dy4", "voice3", "dy3", "voice2", "dy2", "voice1", "dy1",
                    #              "harm", "index", "beat", "measure", "key", "meter" ]  
                    del load_score['key']
                    del load_score['harm']
                    load_score = load_score[~load_score['beat'].astype(str).str.startswith(('=','.','*'))]
                    load_score = load_score.reset_index(drop=True)

                    # append KEY and ROMAN_NUMERAL to score dataframe based on MEASURE and BEAT
                    df_pointer = 0
                    score_pointer = 0
                    len_df = load_df.shape[0]
                    len_score = load_score.shape[0]
                    if trans_num != 20: # transposed version
                        cur_key = trans_by_org_key[load_df.iloc[df_pointer]['new_key']][trans_num]
                    else: # org score
                        cur_key = load_df.iloc[df_pointer]['new_key']
                    cur_roman = construct_RM(load_df.iloc[df_pointer]['chord'], load_df.iloc[df_pointer]['numeral'],\
                                             load_df.iloc[df_pointer]['chord_type'], load_df.iloc[df_pointer]['relativeroot'])
                    cur_df_mea = load_df.iloc[df_pointer]['measure']
                    cur_df_beat = float(load_df.iloc[df_pointer]['beat'])
                    cur_df_mebe = str(cur_df_mea) + "." + str(cur_df_beat)

                    next_df_mea = int(load_df.iloc[df_pointer+1]['measure'])
                    next_df_beat = float(load_df.iloc[df_pointer+1]['beat'])
                    next_df_mebe = str(next_df_mea) + "." + str(next_df_beat)

                    while score_pointer < len_score:
                        mea = load_score.iloc[score_pointer]['measure']
                        cur_score_mea = get_mea_num(mea)
                        cur_score_beat = float(load_score.iloc[score_pointer]['beat'])
                        cur_score_mebe = str(cur_score_mea) + "." + str(cur_score_beat)

                        #print("score", score_pointer, cur_score_mebe)
                        #print("df", df_pointer, cur_df_mebe, next_df_mebe)
                        #print(cur_score_mebe >= cur_df_mebe, cur_score_mebe < next_df_mebe)
                        if version.parse(cur_score_mebe) < version.parse(next_df_mebe):
                            load_score.loc[score_pointer,"key"] = cur_key 
                            load_score.loc[score_pointer,"harm"] = cur_roman
                            score_pointer += 1
                        else:
                            if df_pointer < len_df-2:
                                df_pointer += 1
                                if trans_num != 20: # transposed version
                                    cur_key = trans_by_org_key[load_df.iloc[df_pointer]['new_key']][trans_num]
                                else: # org score
                                    cur_key = load_df.iloc[df_pointer]['new_key']
                                cur_roman = construct_RM(load_df.iloc[df_pointer]['chord'], load_df.iloc[df_pointer]['numeral'],\
                                                         load_df.iloc[df_pointer]['chord_type'], load_df.iloc[df_pointer]['relativeroot'])
                                cur_df_mebe = next_df_mebe

                                next_df_mea = int(load_df.iloc[df_pointer+1]['measure'])
                                next_df_beat = float(load_df.iloc[df_pointer+1]['beat'])
                                next_df_mebe = str(next_df_mea) + "." + str(next_df_beat)
                            elif df_pointer == len_df-2:
                                # the last row of df
                                df_pointer += 1
                                if trans_num != 20: # transposed version
                                    cur_key = trans_by_org_key[load_df.iloc[df_pointer]['new_key']][trans_num]
                                else: # org score
                                    cur_key = load_df.iloc[df_pointer]['new_key']
                                cur_roman = construct_RM(load_df.iloc[df_pointer]['chord'], load_df.iloc[df_pointer]['numeral'],\
                                                         load_df.iloc[df_pointer]['chord_type'], load_df.iloc[df_pointer]['relativeroot'])
                                cur_df_mebe = next_df_mebe
                                next_df_mebe = str(next_df_mea+100000000) + "." + str(next_df_beat)

                    # save to new .krn file
                    save_file = score.split(".")[0]
                    save_name = "../../datasets/ABC/ABC_org_score_for_vec_merged/" + save_file + "_merged.krn"
                    #save_name = "../../datasets/ABC/ABC_reduced_score_for_vec_merged/" + save_file + "_merged.krn"
                    load_score.to_csv(save_name, header=True, index=None, sep='\t', mode='a')

                    
                except:
                    bad_files.append(score)
                            
                            
                
                

processing_df n03op18_02.csv
match_score n03op18_02_downm3_trans_vector_pre.krn
match_score n03op18_02_upP4_trans_vector_pre.krn
match_score n03op18_02_downmm3_trans_vector_pre.krn
match_score n03op18_02_downmm2_trans_vector_pre.krn
match_score n03op18_02_downm2_trans_vector_pre.krn
match_score n03op18_02_vector_pre.krn
match_score n03op18_02_upm2_trans_vector_pre.krn
match_score n03op18_02_upmm2_trans_vector_pre.krn
match_score n03op18_02_A4_trans_vector_pre.krn
match_score n03op18_02_downP4_trans_vector_pre.krn
match_score n03op18_02_upmm3_trans_vector_pre.krn
match_score n03op18_02_upm3_trans_vector_pre.krn
processing_df n14op131_01.csv
match_score n14op131_01_downm3_trans_vector_pre.krn
match_score n14op131_01_upP4_trans_vector_pre.krn
match_score n14op131_01_upmm2_trans_vector_pre.krn
match_score n14op131_01_downm2_trans_vector_pre.krn
match_score n14op131_01_A4_trans_vector_pre.krn
match_score n14op131_01_upmm3_trans_vector_pre.krn
match_score n14op131_01_vector_pre.krn
match_sco

match_score n06op18_02_upmm2_trans_vector_pre.krn
match_score n06op18_02_upm2_trans_vector_pre.krn
match_score n06op18_02_downP4_trans_vector_pre.krn
match_score n06op18_02_upmm3_trans_vector_pre.krn
match_score n06op18_02_vector_pre.krn
match_score n06op18_02_downmm3_trans_vector_pre.krn
match_score n06op18_02_A4_trans_vector_pre.krn
match_score n06op18_02_upm3_trans_vector_pre.krn
match_score n06op18_02_downm2_trans_vector_pre.krn
match_score n06op18_02_upP4_trans_vector_pre.krn
match_score n06op18_02_downm3_trans_vector_pre.krn
processing_df n12op127_03.csv
match_score n12op127_03_vector_pre.krn
match_score n12op127_03_upm2_trans_vector_pre.krn
match_score n12op127_03_downm3_trans_vector_pre.krn
match_score n12op127_03_upm3_trans_vector_pre.krn
match_score n12op127_03_downm2_trans_vector_pre.krn
match_score n12op127_03_downmm2_trans_vector_pre.krn
match_score n12op127_03_upP4_trans_vector_pre.krn
match_score n12op127_03_upmm3_trans_vector_pre.krn
match_score n12op127_03_downmm3_tran

match_score n13op130_05_downP4_trans_vector_pre.krn
match_score n13op130_05_upm2_trans_vector_pre.krn
match_score n13op130_05_downmm3_trans_vector_pre.krn
match_score n13op130_05_A4_trans_vector_pre.krn
match_score n13op130_05_downm2_trans_vector_pre.krn
match_score n13op130_05_upmm2_trans_vector_pre.krn
match_score n13op130_05_downm3_trans_vector_pre.krn
match_score n13op130_05_upmm3_trans_vector_pre.krn
match_score n13op130_05_downmm2_trans_vector_pre.krn
match_score n13op130_05_vector_pre.krn
match_score n13op130_05_upP4_trans_vector_pre.krn
processing_df n13op130_01.csv
match_score n13op130_01_upmm2_trans_vector_pre.krn
match_score n13op130_01_downmm3_trans_vector_pre.krn
match_score n13op130_01_upmm3_trans_vector_pre.krn
match_score n13op130_01_downmm2_trans_vector_pre.krn
match_score n13op130_01_downP4_trans_vector_pre.krn
match_score n13op130_01_upP4_trans_vector_pre.krn
match_score n13op130_01_downm3_trans_vector_pre.krn
match_score n13op130_01_upm3_trans_vector_pre.krn
match_s

match_score n08op59_02_downmm2_trans_vector_pre.krn
match_score n08op59_02_vector_pre.krn
match_score n08op59_02_downm3_trans_vector_pre.krn
match_score n08op59_02_upm3_trans_vector_pre.krn
match_score n08op59_02_upmm2_trans_vector_pre.krn
match_score n08op59_02_A4_trans_vector_pre.krn
match_score n08op59_02_upmm3_trans_vector_pre.krn
match_score n08op59_02_downm2_trans_vector_pre.krn
match_score n08op59_02_upm2_trans_vector_pre.krn
processing_df n02op18_02.csv
match_score n02op18_02_vector_pre.krn
match_score n02op18_02_downm2_trans_vector_pre.krn
match_score n02op18_02_upm2_trans_vector_pre.krn
match_score n02op18_02_downm3_trans_vector_pre.krn
match_score n02op18_02_upm3_trans_vector_pre.krn
match_score n02op18_02_upP4_trans_vector_pre.krn
match_score n02op18_02_upmm3_trans_vector_pre.krn
match_score n02op18_02_downP4_trans_vector_pre.krn
match_score n02op18_02_downmm3_trans_vector_pre.krn
match_score n02op18_02_upmm2_trans_vector_pre.krn
match_score n02op18_02_downmm2_trans_vector_

In [10]:
# new
bad_files

['n14op131_07_score_reduced4_tmp_A4_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_downm2_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_upmm3_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_downm3_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_upmm2_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_downmm2_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_upm2_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_downP4_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_downmm3_trans_vector_pre.krn',
 'n06op18_03_score_reduced4_tmp_upm3_trans_vector_pre.krn',
 'n04op18_04_score_reduced4_tmp_downm3_trans_vector_pre.krn',
 'n04op18_04_score_reduced4_tmp_downmm3_trans_vector_pre.krn',
 'n04op18_04_score_reduced4_tmp_vector_pre.krn',
 'n04op18_04_score_reduced4_tmp_upm3_trans_vector_pre.krn',
 'n04op18_04_score_reduced4_tmp_downmm2_trans_vector_pre.krn',
 'n04op18_04_score_reduced4_tmp_downm2_trans_vector_pre.krn',
 'n04op18_04_score_reduced4_