# Explanation Based Mixture DTW

This notebook implements an Explanation-Based 2D Mixture DTW.  The only requirement in this notebook is that it implement the `offline_processing()` and `online_processing()` functions, which will be imported and run in `02_RunExperiment.ipynb`.

Our main findings:
- We find that this approach is much slower than a naive pairwise DTW approach, and does not improve performance.

## Offline Processing

In the offline processing stage, two things are computed and stored in the `cache/` folder:
- chroma features for the orchestra recording
- chroma features for the full mix recording

In [None]:
import numpy as np
import librosa as lb
import os
import os.path
import import_ipynb
import align_tools
import system_utils
from hmc_mir.align import dtw, isa
from numba import jit, njit, prange
from matplotlib import pyplot as plt
from scipy.spatial.distance import cdist

In [None]:
def offline_processing(scenario_dir, cache_dir, hop_length):
    '''
    Carries out offline processing for a simple offline DTW system.
    
    Inputs
    scenario_dir: The scenario directory to process
    cache_dir: The location of the cache directory
    hop_length: The hop length in samples used when computing chroma features
    steps: an L x 2 array specifying the allowable DTW transitions
    weights: a length L array specifying the DTW transition weights
    
    This function will store the computed chroma features and estimated alignment in the cache folder.
    '''
    
    # setup
    system_utils.verify_scenario_dir(scenario_dir)
    if os.path.exists(cache_dir):
        # print(f'{cache_dir} has already been processed.  Skipping.')
        pass
    else:
        # setup
        os.makedirs(cache_dir)

        # compute orchestra features
        o_file = f'{scenario_dir}/o.wav'
        y_o, sr = lb.core.load(o_file)
        F_o = lb.feature.chroma_cqt(y=y_o, sr=sr, hop_length=hop_length, norm=None) 

        # compute full mix features
        po_file = f'{scenario_dir}/po.wav'
        y_po, sr = lb.core.load(po_file)
        F_po = lb.feature.chroma_cqt(y=y_po, sr=sr, hop_length=hop_length, norm=None)
      
        # compute subsequence DTW alignment (orchestra as query) 
        orch_start_sec, orch_end_sec = system_utils.get_orchestra_start_end_times(scenario_dir)
        orch_start_frm = int(np.round(orch_start_sec * sr / hop_length))
        orch_end_frm = int(np.round(orch_end_sec * sr / hop_length)) + 1

        # save to cache
        np.save(f'{cache_dir}/o_chroma.npy', F_o)
        np.save(f'{cache_dir}/po_chroma.npy', F_po)
        np.save(f'{cache_dir}/orch_start_end_frm.npy', np.array([orch_start_frm, orch_end_frm]))
    
    return

In [None]:
def verify_cache_dir(indir):
    '''
    Verifies that the specified cache directory has the required files.
    
    Inputs
    indir: The cache directory to verify
    '''
    assert os.path.exists(f'{indir}/o_chroma.npy'), f'o_chroma.npy missing from {indir}'
    assert os.path.exists(f'{indir}/po_chroma.npy'), f'po_chroma.npy missing from {indir}'
    #assert os.path.exists(f'{indir}/o_po_align.npy'), f'o_po_align.npy missing from {indir}'
    assert os.path.exists(f'{indir}/orch_start_end_frm.npy'), f'orch_start_end_frm.npy missing from {indir}'

# Online Processing

In the online processing stage, we do the following:
- compute the P-PO alignment using standard subsequence DTW with chroma features
- find the matching PO segment and the corresponding P frames.  This establishes a baseline similarity when using only P features.
- calculate a 2d cost matrix between the (matching) PO frames and O frames using a mixture-based cost.  This cost is computed by first re-weighting the features to ensure O and P features have roughly equal volume, adding the P and O features, and then comparing the sum to the corresponding PO frame
- the final 2d cost matrix between PO and O is determined by calculating the difference between the baseline similarity (only using P features) and the mixture similarity (using both P and O features).  In essence, we are measuring how much using O features in addition to P features improves our ability to explain the PO features.
- perform standard subsequence on this explanation-based cost matrix

In [None]:
def online_processing(scenario_dir, out_dir, cache_dir, hop_length, steps, weights):
    '''
    Carries out `online' processing for a simple offline DTW system.
    
    Inputs
    scenario_dir: The scenario directory to process
    out_dir: The directory to put results, intermediate files, and logging info
    cache_dir: The cache directory
    hop_length: The hop length in samples used when computing chroma features
    steps: an L x 2 array specifying the allowable DTW transitions
    weights: a length L array specifying the DTW transition weights

    This function will compute and save the predicted alignment in the output directory in a file hyp.npy
    '''
    
    # verify & setup
    system_utils.verify_scenario_dir(scenario_dir)
    verify_cache_dir(cache_dir)
    assert not os.path.exists(out_dir), f'Output directory {out_dir} already exists.'
    os.makedirs(out_dir)
    
    # compute features
    p_file = f'{scenario_dir}/p.wav'
    y, sr = lb.core.load(p_file)
    F_p = lb.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length, norm=None)  # piano features
    F_po = np.load(f'{cache_dir}/po_chroma.npy') # full mix features
    F_o = np.load(f'{cache_dir}/o_chroma.npy') # orchestra features
    orch_start_frm, orch_end_frm = np.load(f'{cache_dir}/orch_start_end_frm.npy')

    # compute P-PO alignment
    C = 1 - lb.util.normalize(F_p, norm=2, axis=0).T @ lb.util.normalize(F_po, norm=2, axis=0)
    _, _, wp_AB = dtw.dtw(C, steps, weights, True)
    
    # get aligned P & PO features (matching region only)
    po_start_frm, po_end_frm = wp_AB[1,0], wp_AB[1,-1] + 1
    F_po_match = F_po[:,po_start_frm:po_end_frm]
    F_p_aligned = time_stretch_part(F_p, F_po, wp_AB.T)
    F_p_aligned = F_p_aligned[:, po_start_frm:po_end_frm] # only keep matching portion
    assert(F_po_match.shape == F_p_aligned.shape)

    # baseline similarity between PO and P
    F_po_match_norm = lb.util.normalize(F_po_match, axis=0, norm=2) # note: handles zeros properly
    F_p_aligned_norm = lb.util.normalize(F_p_aligned, axis=0, norm=2)
    baseline_similarity = np.sum(F_po_match_norm * F_p_aligned_norm, axis=0)

    # similarity between PO and P_plus_O
    p_vol_avg = np.sum(np.mean(F_p, axis=1))
    o_vol_avg = np.sum(np.mean(F_o, axis=1))
    mixture_similarity = np.zeros((F_po_match.shape[1], F_o.shape[1]))
    # Note: this implementation uses a for loop to reduce memory usage
    for i in range(F_po_match.shape[1]): 
        Frow_p_plus_o = F_p_aligned[:,i].reshape((-1,1)) * o_vol_avg / p_vol_avg + F_o # apply volume gain so O and P have roughly equal volume
        mixture_similarity[i,:] = F_po_match_norm[:,i].reshape((1,-1)) @ lb.util.normalize(Frow_p_plus_o, axis=0, norm=2)

    # compute PO - P_plus_O alignment
    Cdiff = baseline_similarity.reshape(-1,1) - mixture_similarity
    _, _, wp_BC = dtw.dtw(Cdiff, steps, weights, True)
    wp_BC[0,:] = wp_BC[0,:] + po_start_frm  # account for offset

    # infer piano-orchestra alignment
    hop_sec = hop_length / sr
    wp_AC = align_tools.infer_alignment(wp_AB, wp_BC, frames=True)
    np.save(f'{out_dir}/hyp.npy', wp_AC*hop_sec)
    
    return

In [None]:
def cos_dist_safe(F1, F2):
    '''
    Calculates the pairwise cos distance between two features matrices.  Handles zero magnitudes safely.
    '''
    C = 1 - lb.util.normalize(F_o_mod, norm=2, axis=0).T @ lb.util.normalize(F_po, norm=2, axis=0)
    return C

In [None]:
def time_stretch_part(query, ref, alignment):
    """Uses the alignment computed from DTW to time stretch the query to have the same dimensions as the reference.
    
    Args:
        query (np.ndarray): The features of the part
        ref (np.ndarray): The features of the full mix
        alignment (np.ndarray): The alignment between the part and full mix, shape L x 2
    
    Returns:
        feature_stretch (np.ndarray): The time stretched part
    """
    m, n = ref.shape
    feature_stretch = np.zeros((m, n))
    used = set(alignment[:, 1])
    for query_idx, ref_idx in alignment:
        feature_stretch[:, ref_idx] = query[:, query_idx]
    ref_start_frm, ref_end_frm = alignment[0,1], alignment[-1,1] + 1
    for j in range(ref_start_frm + 1, ref_end_frm - 1):
        if j not in used:
            feature_stretch[:, j] = 0.5 * (feature_stretch[:,j-1] + feature_stretch[:,j+1])
    return feature_stretch

In [None]:
def verify_hyp_dir(indir):
    '''
    Verifies that the specified scenario hypothesis directory has the required files.
    
    Inputs
    indir: The cache directory to verify
    '''
    assert os.path.exists(f'{indir}/hyp.npy')

# Example


Here is an example of how to call the offline and online processing functions on a scenario directory.

In [None]:
# scenario_dir = 'scenarios/s2'
# out_dir = 'experiments/test/s2'
# cache_dir = 'experiments/test/cache'
# hop_size = 512
# steps = np.array([1,1,1,2,2,1]).reshape((-1,2))
# weights = np.array([2,3,3])
# offline_processing(scenario_dir, cache_dir, hop_size, steps, weights)
# online_processing(scenario_dir, out_dir, cache_dir, hop_size, steps, weights)