In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os.path
from pathlib import Path
import pickle
import multiprocessing
import time
import gc
from tqdm import tqdm

In [2]:
%run align_tools_cython.ipynb

In [3]:
%run _NWTW.ipynb

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [4]:
%run DTWStar.ipynb

In [5]:
TRAIN_SET = 'toy'

In [6]:
QUERY_LIST = Path(f'cfg_files/queries.train.{TRAIN_SET}')

In [7]:
SYSTEMS = ['dtw1', 'dtw2', 'dtw3', 'dtw4', 'subseqdtw1', 'subseqdtw2', 'subseqdtw3', 'nwtw', 'dtwstarv02']
BENCHMARKS = ['matching', 'subseq20', 'subseq30', 'subseq40', 'partialStart', 'partialEnd', 'partialOverlap', 
              'pre_5', 'pre_10', 'pre_20', 'post_5', 'post_10', 'post_20', 'preA_postB_5', 'preA_postB_10', 
              'preA_postB_20']

In [8]:
features_root = Path('../ttmp/Chopin_Mazurkas_features')
FEAT_DIRS = {}

for benchmark in BENCHMARKS:
    if benchmark == 'partialOverlap':
        FEAT_DIRS[benchmark] = ([features_root/'partialStart', features_root/'partialEnd'])
    elif 'preA_postB' in benchmark:
        sec = benchmark.split('_')[-1]
        FEAT_DIRS[benchmark] = ([features_root/f'pre_{sec}', features_root/f'post_{sec}'])
    else:
        FEAT_DIRS[benchmark] = [features_root/f'{benchmark}', features_root/'original']

In [9]:
steps = {'dtw1': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'dtw2': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'dtw3': np.array([1,1,1,0,0,1]).reshape((-1,2)),
        'dtw4': np.array([1,1,1,0,0,1]).reshape((-1,2)),
        'subseqdtw1': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'subseqdtw2': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'subseqdtw3': np.array([1,1,1,2,2,1]).reshape((-1,2)),
        'nwtw': 0, # transitions are specified in NWTW algorithm
        'dtwstarv02': np.array([1,1,1,2,2,1]).reshape((-1,2))
        }
weights = {'dtw1': np.array([2,3,3]),
          'dtw2': np.array([1,1,1]),
          'dtw3': np.array([2,1,1]),
          'dtw4': np.array([1,1,1]),
          'subseqdtw1': np.array([1,1,2]),
          'subseqdtw2': np.array([2,3,3]),
          'subseqdtw3': np.array([1,1,1]),
          'nwtw': 0, # weights are specified in NWTW algorithm
          'dtwstarv02': np.array([2,3,3])
          }
other_params = {'dtwstarv02': {'buffer': 10/(512/22050)}}

# Benchmarks

In [10]:
def get_outfile(outdir, benchmark, system, queryid):
    outpath = (outdir / benchmark / system)
    outpath.mkdir(parents=True, exist_ok=True)
    outfile = (outpath / queryid).with_suffix('.pkl')
    return outfile

In [13]:
def align_system(system, F1, F2, outfile):
    
    subseq = 'subseq' in system
    
    if system == 'dtwstarv02':
        C = 1 - L2norm(F1).T @ L2norm(F2)
        best_cost, wp, debug = dtwstar_v2a(C, steps=steps[system], weights=weights[system], buffer=other_params[system]['buffer'])
    elif system == 'nwtw':
        wp = alignNWTW(F1, F2, downsample=1, gamma=0.346, profile = False)
    else:
        if subseq and (F2.shape[1] < F1.shape[1]):
            # Cython DTW implementation
            wp = alignDTW(F2, F1, steps=steps[system], weights=weights[system], downsample=1, outfile=outfile, subseq=subseq)
            wp = wp[::-1,:]
        else:
            # Cython DTW implementation
            wp = alignDTW(F1, F2, steps=steps[system], weights=weights[system], downsample=1, outfile=outfile, subseq=subseq)
            
    if wp is not None:
        pickle.dump(wp, open(outfile, 'wb'))
    else:
        # currently handling None outputs in alignment algorithms by writing None to output file
        pass

In [14]:
def run_all_benchmarks(outdir):
    parts_batch = []
    queryids = []
    with open(QUERY_LIST, 'r') as f:
        for line in tqdm(f):
            parts = line.strip().split(' ')
            assert len(parts) == 2
            queryid = os.path.basename(parts[0]) + '__' + os.path.basename(parts[1])
            
            parts_batch.append(parts)
            queryids.append(queryid)
            
    for benchmark in BENCHMARKS:
#         run_benchmark(benchmark, FEAT_DIRS[benchmark][0], FEAT_DIRS[benchmark][1], parts_batch[0], outdir, queryids[0])
        run_benchmark_batch(benchmark, FEAT_DIRS[benchmark][0], FEAT_DIRS[benchmark][1], parts_batch, outdir, queryids, n_cores=4)

In [15]:
def run_benchmark_batch(benchmark, featdir1, featdir2, parts_batch, outdir, queryids, n_cores):
    inputs = []
    assert len(parts_batch) == len(queryids)
    
    for i in range(len(parts_batch)):
        featfile1 = (featdir1 / parts_batch[i][0]).with_suffix('.npy')
        featfile2 = (featdir2 / parts_batch[i][1]).with_suffix('.npy')
        
        F1 = np.load(featfile1)
        F2 = np.load(featfile2)
        
        for system in SYSTEMS:
            inputs.append((system, F1, F2, get_outfile(outdir, benchmark, system, queryids[i])))

    # process files in parallel
    pool = multiprocessing.Pool(processes = multiprocessing.cpu_count()-1)
    pool.starmap(align_system, inputs)
    
    
    return

In [16]:
def run_benchmark(benchmark, featdir1, featdir2, parts, outdir, queryid):
    featfile1 = (featdir1 / parts[0]).with_suffix('.npy')
    featfile2 = (featdir2 / parts[1]).with_suffix('.npy')

    F1 = np.load(featfile1)
    F2 = np.load(featfile2)
        
    # run all baselines
    for system in SYSTEMS:
        align_system(system, F1, F2, get_outfile(outdir, benchmark, system, queryid))

In [17]:
outdir = Path(f'experiments_train/{TRAIN_SET}')
run_all_benchmarks(outdir)

5it [00:00, 9493.67it/s]
