# Evaluate

This notebook evaluates the quality of the online alignments in a given experiment directory.

In [None]:
%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import os.path
import pandas as pd
# import seaborn as sns
# from pathlib import Path
# import glob
import pickle

## Calculate Alignment Errors

First we calculate the alignment errors of a given system on all evaluated measures.

In [None]:
def parseAnnotationFile(annotfile):
    '''
    Parses a beat annotation file.
    
    Inputs
    annotfile: filepath of the beat annotation file
    
    Returns a dictionary whose key is the measure number and whose value is the corresponding timestamp.
    '''
    df = pd.read_csv(annotfile, sep=',')
    timestamps = np.array(df['start'])
    measure_nums = np.array(df['measure'])
    d = {}
    for (t, m) in zip(timestamps, measure_nums):
        d[m] = t
    return d

In [None]:
def getGroundTruthTimestamps(annotfile1, annotfile2):
    '''
    Parses two beat annotation files and returns a list of the corresponding ground truth timestamps.
    
    Inputs
    annotfile1: the first beat annotation file
    annotfile2: the second beat annotation file
    
    Outputs
    eval_pts: an Nx2 array of specifying the ground truth timestamps for N measures
    overlap_measures: an array containing the list of evaluated measures, sorted in increasing order
    '''
    
    # parse annotation files
    gt1 = parseAnnotationFile(annotfile1)
    gt2 = parseAnnotationFile(annotfile2)

    # determine which measures to evaluate
    overlap_measures = sorted(set(gt1).intersection(set(gt2)))
    
    # construct (t1, t2) ground truth timestamps
    eval_pts = []
    for m in overlap_measures:
        eval_pts.append((gt1[m], gt2[m]))
    
    return np.array(eval_pts), np.array(overlap_measures)

In [None]:
def calcAlignErrors_single(hypfile, annotfile1, annotfile2, hop_sec):
    '''
    Calculates the alignment errors for a single hypothesis file.
    
    Inputs
    hypfile: a .npy file containing the estimated alignment
    annotfile1: the beat annotation file for the piano recording
    annotfile2: the beat annotation file for the orchestra recording
    hop_sec: hop size between frames in the DTW alignment
    
    Outputs
    err: the alignment errors in the estimated alignment
    measNums: the measure numbers that are evaluated
    '''
    gt, measNums = getGroundTruthTimestamps(annotfile1, annotfile2) # ground truth
    hypalign = np.load(hypfile) # piano-orchestra predicted alignment in frames
    pred = np.interp(gt[:,0], hypalign[0,:]*hop_sec, hypalign[1,:]*hop_sec)
    err = pred - gt[:,1]
    return err, measNums

In [None]:
def getScenarioIds(scenarios_dir):
    '''
    Gets a list of scenario ids in a given scenarios/ directory.
    
    Inputs
    scenarios_dir: directory containing scenarios information
    
    Returns a list of scenario ids, sorted in increasing order.
    '''
    d = pd.read_csv(f'{scenarios_dir}/scenarios.summary', header=None, sep=' ')
    return list(d[0])

In [None]:
def calcAlignErrors_batch(exp_dir, scenarios_dir, hop_sec, out_dir):
    '''
    Calculates the alignment errors for all scenarios in an experiment directory.
    
    Inputs
    exp_dir: the experiment directory to evaluate
    scenarios_dir: the directory containing the scenarios information
    hop_sec: hop size between frames in the DTW alignment
    out_dir: the directory to save outputs and figures to
    '''
    # evaluate all scenarios
    d = {} 
    for scenario_id in getScenarioIds(scenarios_dir):
        hypFile = f'{exp_dir}/{scenario_id}/hyp.npy'
        pianoAnnot = f'{scenarios_dir}/{scenario_id}/p.beats'
        orchAnnot = f'{scenarios_dir}/{scenario_id}/o.beats'
        err, measNums = calcAlignErrors_single(hypFile, pianoAnnot, orchAnnot, hop_sec)
        d[scenario_id] = (err, measNums) # key: scenario_id, value: (errors, measureNums)
        
    # save
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    outfile = f'{out_dir}/errs.pkl'
    pickle.dump(d, open(outfile, 'wb'))

In [None]:
exp_dir = 'experiments/simpleOfflineDTW' # change
scenarios_dir = 'scenarios'
eval_dir = 'eval/' + os.path.basename(exp_dir)
hop_sec = 512./22050
calcAlignErrors_batch(exp_dir, scenarios_dir, hop_sec, eval_dir)

## Plot Error vs Tolerance

We can visualize the results by plotting the error rate across a range of error tolerances.

In [None]:
def plotErrorVsTolerance(eval_dir, maxTol, savepng = None):
    '''
    Plots the error rate across a range of error tolerances.
    
    Inputs
    eval_dir: the eval directory to plot
    maxTol: maximum error tolerance to consider (in milliseconds)
    savepng: if True, will save the figure to file as png image
    '''
    # load
    with open(f'{eval_dir}/errs.pkl', 'rb') as f:
        d = pickle.load(f)
    
    # flattened list
    errs = []
    for scenario_id in d:
        errs = np.append(errs, d[scenario_id][0])

    # calculate error rates
    errRates = np.zeros(maxTol+1)
    tols = np.arange(maxTol+1)
    for i in tols:
        errRates[i] = np.mean(np.abs(errs) > i/1000)
        
    # plot
    plt.plot(tols, errRates * 100.0)
    plt.ylabel('Error Rate (%)')
    plt.xlabel('Error Tolerance (ms)')
    if savepng:
        plt.savefig(f'{eval_dir}/errorVsTol.png')
    
    return errRates, tols

In [None]:
maxTol = 5000 # in milliseconds
errRates, tols = plotErrorVsTolerance(eval_dir, maxTol, savepng=False)

In [None]:
errRates[1000]*100.0

## Analyses

Below are some analysis plots to gain intuition in system performance.

In [None]:
with open(f'{eval_dir}/errs.pkl', 'rb') as f:
    d = pickle.load(f)

In [None]:
scenario_id = 's12'
errs = d[scenario_id][0]
measures = len(errs)
plt.plot(np.arange(measures), errs)

In [None]:
# plt.plot(np.arange(len(errs)), errs)
# plt.xlabel('Measure Number')
# plt.ylabel('Alignment Error (sec)')