In [1]:
import os
import pandas as pd
import numpy as np
from utils import get_beat_vector, to_chromatic
import re
import sys
import difflib

In [2]:
script_dir = os.getcwd()
score_rel_path = "../datasets/all_krn_for_vec/op20n6-02_vector_pre.krn"
scorepath = os.path.join(script_dir, score_rel_path)


In [3]:
df = pd.read_csv(scorepath, sep="\t", header=None)
pd.set_option('display.max_rows', df.shape[0]+1)
df.columns = ["harm", "voice4", "voice3", "voice2", "voice1", "beat", "meter" ]


In [4]:
#process dataframe, delete bars, score representations, and "rest slice"
df = df[~df['beat'].astype(str).str.startswith(('=','.','*'))]
df = df.reset_index(drop=True)
df

Unnamed: 0,harm,voice4,voice3,voice2,voice1,beat,meter
0,I,4E,8r,8r,2.b,1.0,M4/4
1,I,4E,8B,8g#,2.b,1.5,M4/4
2,I,4G#,8r,8r,2.b,2.0,M4/4
3,I,4G#,8B,8b,2.b,2.5,M4/4
4,V7c,4F#,8r,8r,2.b,3.0,M4/4
5,V7c,4F#,8d#,8a,2.b,3.5,M4/4
6,V7,4BB,8r,8r,4dd#,4.0,M4/4
7,V7,4BB,8a,8f#,4dd#,4.5,M4/4
8,I,4E,8r,8r,8ee,1.0,M4/4
9,I,4E,8g#,8g#,8b,1.5,M4/4


In [5]:
prev_note_list = []
prev_chro_list = []
prev_harm = ""
for index, row in df.iterrows():
    # for this onset slice 

    ######################################### Process notes and onsets ########################################
    
    voices = ["voice4","voice3","voice2","voice1"]
    cur_note_tmp = []
    for v in voices:
        this_note = ''.join(row[[v]].values)
        if ' ' in this_note: #for a part has multiple voices
            this_note_list = this_note.split(" ")
            cur_note_tmp.extend(this_note_list)
        else:
            cur_note_tmp.append(this_note)
    
    cur_note_list = []
    for n in cur_note_tmp:
        note_name = re.sub('[^a-gA-G#-]+', '', n)
        if note_name:
            cur_note_list.append(note_name)
    cur_chro_list = to_chromatic(cur_note_list)
    
    cur_onset_list = list(set(cur_note_list) - set(prev_note_list))
    onset_st = len(cur_onset_list)
    df.at[index,'onset_strength'] = int(onset_st)
    
    simi = difflib.SequenceMatcher(None, prev_chro_list, cur_chro_list).ratio()
    #print(prev_chro_list, cur_chro_list, simi)
    if index != 0:
        df.at[index-1,'similarity'] = simi
    prev_note_list = cur_note_list
    prev_chro_list = cur_chro_list  
  

    ######################################### Process beat position ########################################
    beat_pos = ''.join(row[["beat"]].values)
    meter = ''.join(row[["meter"]].values)
    meter = meter.replace("M", "")
    beat_st = get_beat_vector(beat_pos, meter)
    df.at[index,'beat_strength'] = beat_st
    
    ######################################### Process label ###########################################
    cur_harm = ''.join(row[["harm"]].values)
    if cur_harm != prev_harm:
        df.at[index,"LABEL"] = 1
    else:
        df.at[index,"LABEL"] = 0
    prev_harm = cur_harm
    #########################################################################################################

#set the last data point
df.iloc[-1, df.columns.get_loc('similarity')] = 0 

In [6]:
df

Unnamed: 0,harm,voice4,voice3,voice2,voice1,beat,meter,onset_strength,beat_strength,LABEL,similarity
0,I,4E,8r,8r,2.b,1.0,M4/4,2.0,strong beat,1.0,0.8
1,I,4E,8B,8g#,2.b,1.5,M4/4,2.0,off beat,0.0,0.8
2,I,4G#,8r,8r,2.b,2.0,M4/4,1.0,weak beat,0.0,1.0
3,I,4G#,8B,8b,2.b,2.5,M4/4,1.0,off beat,0.0,0.5
4,V7c,4F#,8r,8r,2.b,3.0,M4/4,1.0,strong beat,1.0,0.666667
5,V7c,4F#,8d#,8a,2.b,3.5,M4/4,2.0,off beat,0.0,0.666667
6,V7,4BB,8r,8r,4dd#,4.0,M4/4,2.0,weak beat,1.0,0.666667
7,V7,4BB,8a,8f#,4dd#,4.5,M4/4,2.0,off beat,0.0,0.0
8,I,4E,8r,8r,8ee,1.0,M4/4,2.0,strong beat,1.0,0.5
9,I,4E,8g#,8g#,8b,1.5,M4/4,2.0,off beat,0.0,0.8
