In [3]:
import numpy as np
import os.path
import partitura as pt
from basismixer.performance_codec import get_performance_codec
import glob
import re
from scipy import stats
import json

import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm

In [None]:
xml_fn = glob.glob(os.path.join("asap-dataset-copy", "**", "*.musicxml"), recursive=True)
piece_dict = {}

for xml in tqdm(xml_fn):
    
    json_dict = {"matches": [], "targets":[], "matched_basis": [], "xml": xml}
    
    os.makedirs(os.path.join(os.path.dirname(xml), "modified_matches"), exist_ok=True)
    
    piece_dict.update({os.path.dirname(xml): {"matches": [], "targets": [], "xml": xml}})
    
    match_fn = glob.glob(os.path.join(os.path.dirname(xml), "*.match"))

    score = pt.load_score(xml)
    score = pt.score.merge_parts(score)
    score = pt.score.unfold_part_maximal(score, update_ids=True)
    
    nid_dict = dict((n.id, i) for i, n in enumerate(score.notes_tied))
    
    pt.score.expand_grace_notes(score)
    
    basis, bf_names = pt.musicanalysis.make_note_feats(score, "all")

    for match in match_fn:

        try:
            performance, alignment = pt.load_match(match)

            parameter_names = ["velocity_trend", "beat_period"]

            pc = get_performance_codec(parameter_names)

            targets, snote_ids, unique_onset_idxs = pc.encode(
                part=score,
                ppart=performance[0],
                alignment=alignment,
                return_u_onset_idx=True
            )
            
            matched_subset_idxs = np.array([nid_dict[nid] for nid in snote_ids])
            basis_matched = basis[matched_subset_idxs]
            
            json_dict["matches"].append(match)
            json_dict["targets"].append(targets.tolist())
            json_dict["matched_basis"].append(basis_matched.tolist())
            
            piece_dict[os.path.dirname(xml)]["matches"].append(match)
            piece_dict[os.path.dirname(xml)]["targets"].append(targets)

        except Exception as e:
            print(e)
            print(match)
            continue
            
    piece_name = "-".join(xml.split(os.sep)[1:-1])
    outfile = open("jsons" + os.sep + piece_name + ".json", "w+")
    json.dump(json_dict, outfile, indent=6)
    outfile.close()

In [31]:
json_dicts = glob.glob(os.path.join("jsons", "**", "*.json"), recursive=True)
perf_parameters = []

for jd in tqdm(json_dicts):
    piece_name = os.sep.join(jd.split("-"))[:-len(".json")]
    
    with open(jd) as f:
        cur_dict = json.load(f)
        perf_parameters.append(([np.array(t) for t in cur_dict["targets"]], cur_dict["matches"]))

100%|████████████████████████████████████████████████████████████████████████████████| 234/234 [02:52<00:00,  1.36it/s]


In [43]:
# perf_parameters = [np.array(perf[0]) for perf in perf_parameters]

pd.DataFrame(perf_parameters[0][0][0])

Unnamed: 0,0,1,2,3,4
0,1.189583,0.000000,0.511538,0.283465,0.000000
1,1.220833,0.000000,0.266723,0.401575,0.000000
2,1.318750,0.000000,0.183786,0.425197,0.000000
3,1.362500,0.000000,-0.014782,0.448819,0.000000
4,1.125000,0.000000,1.654503,0.346457,0.000000
...,...,...,...,...,...
733,2.354187,0.000000,0.045232,0.393701,0.000000
734,2.716675,0.000000,0.626399,0.370079,0.000000
735,4.270844,0.000000,0.409040,0.511811,0.000000
736,2.763022,-0.009377,-0.002450,0.370079,0.094488


In [44]:
import pandas as pd

indices = [perf[1] for perf in perf_parameters]
expressiveness_params = ["beat_period", "timing", "articulation_log", "velocity_trend", "velocity_dev"]
descriptions = []
dataframes = []


for perf in perf_parameters:
    param_df = pd.DataFrame(perf[0][0], columns=expressiveness_params)
    dataframes.append(param_df)
    descriptions.append(param_df.describe())

IndexError: list index out of range

In [4]:
descriptions[0]["velocity_trend"].T["min"]

0.28346458077430725

In [5]:
def get_outliers(z_threshold=3):
    
    outlier_dict = {"piece_names": []}
    
    for param_name in expressiveness_params:

        print(f"{param_name} outliers:")

        param_df = pd.DataFrame(columns=["min", "max", "mean"], index=indices)

        param_df["min"] = [desc[param_name].T["min"] for desc in descriptions]
        param_df["max"] = [desc[param_name].T["max"] for desc in descriptions]
        param_df["mean"] = [desc[param_name].T["mean"] for desc in descriptions]
        
        min_mean = np.mean(param_df["min"])
        max_mean = np.mean(param_df["max"])
        mean_mean = np.mean(param_df["mean"])

        z_min = np.abs(stats.zscore(param_df["min"]))
        z_max = np.abs(stats.zscore(param_df["max"]))
        z_mean = np.abs(stats.zscore(param_df["max"]))
        
        outliers = param_df[(z_min > z_threshold) | (z_max > z_threshold) | (z_mean > z_threshold)]
        
        print(f"""Mean values for comparison:
                    min: {min_mean}
                    max: {max_mean}
                    mean: {mean_mean}\n""")
        
        print(f"{outliers.to_markdown()}\n\n")
        outlier_dict[param_name] = outliers
        outlier_dict["piece_names"] += [idx for idx in outliers.index]
        
    return outlier_dict

In [None]:
outlier_dict = get_outliers(5)

In [None]:
from matplotlib import pyplot as plt

for param_name in expressiveness_params:
    
    if len(outlier_dict[param_name]) != 0:
    
        fig, ax = plt.subplots(len(outlier_dict[param_name]), figsize=(10, 50))
        for i, piece_name in enumerate(outlier_dict[param_name].index):

            idx = indices.index(piece_name)
            df = dataframes[idx]

            ax[i].plot(df[param_name])
            ax[i].set(title=f"{param_name.upper()}: {piece_name}")

        fig.tight_layout()

In [8]:
pt.musicanalysis.list_note_feats_functions()

['articulation_direction_feature',
 'articulation_feature',
 'duration_feature',
 'fermata_feature',
 'grace_feature',
 'loudness_direction_feature',
 'metrical_feature',
 'metrical_strength_feature',
 'onset_feature',
 'ornament_feature',
 'polynomial_pitch_feature',
 'relative_score_position_feature',
 'slur_feature',
 'staff_feature',
 'tempo_direction_feature',
 'time_signature_feature',
 'vertical_neighbor_feature']

In [9]:
valid_perf_params = [perf for perf in perf_parameters if perf[1] not in outlier_dict["piece_names"]]

test = os.path.dirname(perf_parameters[4][1])
print(test)
piece_dict[test]["xml"]

asap-dataset-copy\Bach\Fugue\bwv_848


'asap-dataset-copy\\Bach\\Fugue\\bwv_848\\xml_score.musicxml'

In [55]:
feat_names = pt.musicanalysis.list_note_feats_functions()

feature_target_list = []

for piece in tqdm(piece_dict):
    if piece not in outlier_dict["piece_names"]:
        score = pt.load_score(piece_dict[piece]["xml"])
        score = partitura.score.merge_parts(score)
        score = partitura.score.unfold_part_maximal(score)
        
        features, snote_ids = pt.musicanalysis.make_note_features(part=score, feature_functions="all")
        
        pt.score.expand_grace_notes(score)
        nid_dict = dict((n.id, i) for i, n in enumerate(score.notes_tied))
        
        
        matched_subset_idxs = np.array([nid_dict[nid] for nid in snote_ids])
        basis_matched = basis[matched_subset_idxs]
        
        
        feature_target_list.append((features, piece_dict[piece]["exp_parameters"]))

100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [11:40<00:00,  2.98s/it]


In [75]:
for f in feature_target_list[20][1]:
    print(len(f))

1392
1391
1393
1385
1391
1393


In [74]:
len(feature_target_list[20][0])

1401

In [105]:
from basismixer.data import remove_grace_notes

all_data = []
bfs = feat_names
parameter_names = ["velocity_trend", "beat_period"]
pc = get_performance_codec(parameter_names)

for piece in piece_dict:
    score = "asap-dataset-copy\\Bach\\Fugue\\bwv_846\\xml_score.musicxml"
    matches = "C:\Users\Max\Desktop\Uni\basismixer\asap-dataset-copy\Bach\Fugue\bwv_846\Shi05M.match"
    
    all_data.append(process_piece((score, matches), pc, bfs, gracenotes="remove"))

asap-dataset-copy\Bach\Fugue\bwv_846\xml_score.musicxml asap-dataset-copy\Bach\Fugue\bwv_846\Shi05M.match


ValueError: Filename does not exist