This notebook modifies the annot files from the original dataset by changing the beat labels to contain measure information.

e.g. 1,2,3,1,2,3 --> 0-1, 0-2, 0-3, 1-1, 1-2, 1-3

In [1]:
import os
import glob
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# (assert) check all annot files have same number of rows
# (assert) start on same beat
# (assert) increasing in triples

In [4]:
ANNOT_PATH = Path('../ttmp/Chopin_Mazurkas/annotations_beat/')
folders = [folder for folder in os.listdir(ANNOT_PATH) if len(folder) == 15]

In [5]:
def assert_labels(folder_path):
    dfs = []
    files = [file for file in os.listdir(folder_path) if file[0]!='.']
    for file in files:
        if file != 'Chopin_Op068No3_Koczalski-1948_pid9140-05.beat':
            dfs.append(pd.read_csv(folder_path/file, header=None, sep='\s+', skiprows=3)[2])
    
    dfs_np = np.array(dfs) # if np array without error, then all dfs have the same shape (annots same length)
    assert len(set(dfs_np[:,0].astype('int'))) == 1 # they all start on the same beat
    
    for df in dfs_np:  # the beats are increasing and in order
        curr = str(df[0])
        curr = int(curr[0]) if curr[-1] == 'x' else int(curr)
        for i in range(1, len(df)):
            new = str(df[i])
            new = int(new[0]) if new[-1] == 'x' else int(new)
            assert (new == curr + 1) or (new == curr - 2)
            curr = new 

In [6]:
def get_preamble(file_path):
    with open(file_path, 'r') as f:
        preamble = [f.readline(), 
                    f.readline(),
                    f.readline()]
    return preamble

In [7]:
def modify_annot(file_path, out_path):
    
    preamble = get_preamble(file_path)

    df = pd.read_csv(file_path, header=None, sep='\s+', skiprows=3)

    if str(df[2][0])[0] == '2':
        df[2][0] = '0-' + str(df[2][0])
        df[2][1] = '0-' + str(df[2][1])
        measure = 1
        start = 2
    elif str(df[2][0])[0] == '3':
        df[2][0] = '0-' + str(df[2][0])
        measure = 1
        start = 1
    else:
        measure = 0
        start = 0

    for i in range(start, (len(df[2]) + 1), 3):
        try:
            df[2][i] = f'{measure}-' + str(df[2][i])
            df[2][i+1] = f'{measure}-' + str(df[2][i+1])
            df[2][i+2] = f'{measure}-' + str(df[2][i+2])
            measure += 1
        except:
            # in case there is no index i+1 or i+2
            pass
        
    if df[1][len(df[1])-1] == 0:
        df = df[:len(df[1])-1]

    annots = df.to_csv(None, header=False, index=False, sep='\t')
    
    with open(out_path, 'w') as f:
        f.writelines(preamble)
        f.writelines(annots)

In [8]:
for folder in folders:
    path = ANNOT_PATH / folder
    folder_path_lst = os.path.abspath(path).split('/')[-2:]
    folder_path = f'../ttmp/Chopin_Mazurkas_Modified/{folder_path_lst[0]}/{folder_path_lst[1]}'
    os.makedirs(folder_path, exist_ok=True)
    assert_labels(path)
    print(folder_path + ' created')
    for file in path.glob('*'):
        file_name = os.path.abspath(file).split('/')[-1]
        if file_name[0] != '.':
            modify_annot(file, f'{folder_path}/{file_name}')

../ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op063No3 created
../ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op068No3 created
../ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op017No4 created
../ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op024No2 created
../ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op030No2 created


In [9]:
! cp -r ../ttmp/Chopin_Mazurkas/wav_22050_mono ../ttmp/Chopin_Mazurkas_Modified/