# DTW* v0.2a

In [None]:
import numpy as np
from numba import jit

In [None]:
@jit(nopython=True)
def dtwstar_v2a(C, steps, weights, buffer=1):
    '''
    Implementation of DTW Star version 0.2a.  
    
    Inputs
    C: pairwise cost matrix
    steps: a numpy matrix specifying the allowable transitions.  It should be of
        dimension (L, 2), where each row specifies (row step, col step)
    weights: a array that specifies the multiplicative weights for each transition
        type.  The length of this array must match the number of possible transitions.
    buffer: specifies 
    
    Outputs
    best_cost: the best average cost per manhattan block
    path: the estimated warping path, specified as a 2xN array
    debug: Debugging information for examining the average cost per manhattan block for each 
        of the candidate ending positions.
    '''
    
    # initialize
    D = np.zeros(C.shape)
    B = np.zeros(C.shape, dtype=np.int8)
    P = np.zeros(C.shape, dtype=np.int32)
    D[0,:] = C[0,:]
    D[:,0] = C[:,0]
    
    # DP
    for row in range(1,C.shape[0]):
        for col in range(1, C.shape[1]):
            
            mincost = np.inf
            minidx = -1
            bestrprev = -1
            bestcprev = -1
            
            # find best transition
            for stepidx, step in enumerate(steps):
                
                (rstep, cstep) = step
                prevrow = row - rstep
                prevcol = col - cstep
                
                if prevrow >= 0 and prevcol >= 0:
                    
                    pathcost = D[prevrow, prevcol] + C[row, col] * weights[stepidx]
                    
                    if pathcost < mincost:
                        
                        mincost = pathcost
                        minidx = stepidx
                        bestrprev = prevrow
                        bestcprev = prevcol
            
            # update D, B, P
            D[row, col] = mincost
            B[row, col] = minidx
            if bestrprev == 0:
                P[row, col] = bestcprev
            elif bestcprev == 0:
                P[row, col] = -1*bestrprev
            else:
                P[row, col] = P[bestrprev, bestcprev]
            
    #  backtrack
    best_cost, best_r, best_c, debug = find_best_endpoint(D, P, buffer)
    path = backtrace_dtwstar(D, B, steps, best_r, best_c)
    path.reverse()
    path = np.array(path).T 
    
    return best_cost, path, debug

In [None]:
@jit(nopython=True)
def find_best_endpoint(D, P, buffer):
    '''
    Determines the best location to begin backtracking from by comparing the average path cost
    per manhattan block.
    
    Inputs
    D: the cumulative cost matrix
    P: the matrix specifying the starting location of the alignment path
    buffer: specifies the length of a buffer region (in frames) to avoid short degenerate alignment paths
        near the corners of the pairwise cost matrix.  This can be thought of as the minimum length that
        needs to match in order to be considered a valid alignment path.
    
    Outputs
    best_cost: the best average path cost per manhattan block
    best_r: the row index of the best endpoint
    best_c: the column index of the best endpoint
    debug: debugging information for examining the average cost per manhattan block for each 
        of the candidate ending positions
    '''
    
    # consider last row and column as candidates
    candidates = [(D.shape[0]-1,i) for i in range(buffer, D.shape[1])] + [(i, D.shape[1]-1) for i in range(buffer, D.shape[0]-1)][::-1]
    
    best_cost = np.inf
    best_r, best_c = -1, -1
    debug = []
    
    for i, (r,c) in enumerate(candidates):
                
        # get alignment start location
        if P[r,c] >= 0:
            rstart, cstart = 0, P[r,c]
        else:
            rstart, cstart = -P[r,c], 0
            
        # calculate average cost per manhattan block
        mdist = (r - rstart) + (c - cstart) # manhattan distance
        avg_cost_per_mb = D[r,c] / mdist
        
        # keep best
        if avg_cost_per_mb < best_cost:
            best_cost = avg_cost_per_mb
            best_r, best_c = r, c
            
        # debugging info
        if r == D.shape[0]-1:
            debug.append((c-D.shape[1]+1, avg_cost_per_mb, r, c))
        else:
            debug.append((D.shape[0]-1-r, avg_cost_per_mb, r, c))
    
    return best_cost, best_r, best_c, debug

In [None]:
@jit(nopython=True)
def backtrace_dtwstar(D, B, steps, rstart, cstart):
    '''
    Backtraces through the cumulative cost matrix D starting from a specified location.
    
    Arguments:
    D: cumulative cost matrix
    B: backtrace matrix
    steps: a numpy matrix specifying the allowable transitions.  It should be of
            dimension (L, 2), where each row specifies (row step, col step)
    rstart: the row index to start backtracking from
    cstart: the column index to start backtracking from
    
    Outputs
    path: a python list of (row, col) coordinates for the optimal path.
    '''
    pos = (rstart, cstart)
    path = []
    path.append(pos)
    while(pos[0] != 0 and pos[1] != 0):
        (row, col) = pos
        stepidx = B[row, col]
        (rstep, cstep) = steps[stepidx]
        pos = (row-rstep, col-cstep)
        path.append(pos)
    
    return path

# Visualize

The code below can be used to run DTW* v0.2a on specific examples, and to visualize the alignments.

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import time
import pandas as pd

In [None]:
def L2norm(F):
    L2norm = np.sqrt(np.sum(F*F, axis = 0)) + 1e-9
    Fnorm = F / L2norm.reshape((1,-1))
    return Fnorm

In [None]:
def getTimestamps(annotfile1, annotfile2):
    df1 = pd.read_csv(annotfile1, header=None, sep='\s+', skiprows=3) 
    df2 = pd.read_csv(annotfile2, header=None, sep='\s+', skiprows=3)
    
    df_merged = pd.merge(df1, df2, on=[2], how='inner')

    return np.array(df_merged['0_x']), np.array(df_merged['0_y'])

In [None]:
def mapFrame(r, c, Dshape, frames=False):
    if frames: #  frames, use exact
        if r == Dshape[0]-1:
            val = c-D.shape[1]+1
        else:
            val = D.shape[0]-1-r
    else: # seconds, use approximate
        distr = np.abs(r-Dshape[0])
        distc = np.abs(c-Dshape[1])
        if distr < distc:
            val = c-Dshape[1]
        else:
            val = Dshape[0]-r
    return val

In [None]:
pieceid1 = 'Chopin_Op017No4_Beliavsky-2004_pid9152-13'
#pieceid1 = 'Chopin_Op017No4_Luisada-1990_pid9055-13'
#pieceid1 = 'Chopin_Op017No4_Kilenyi-1937_pid9164-13'
#pieceid1 = 'Chopin_Op017No4_Magaloff-1977_pid5667267b-10'
#pieceid1 = 'Chopin_Op017No4_Wasowski-1980_pid9111-13'
type1 = 'partialStart'
pieceid2 = 'Chopin_Op017No4_Clidat-1994_pid9067-13'
#pieceid2 = 'Chopin_Op017No4_Paderewski-1912_pid5667274-09'
#pieceid2 = 'Chopin_Op017No4_Rubinstein-1939_pid9049-13'
#pieceid2 = 'Chopin_Op017No4_Smith-1975_pid9054-13'
#pieceid2 = 'Chopin_Op017No4_Perahia-1994_pid54293-09'
type2 = 'partialEnd'
steps = np.array([1, 1, 1, 2, 2, 1]).reshape((-1,2))
weights = np.array([2,3,3])
hop_sec = 512/22050.
buffer = 10 # in sec

In [None]:
featfile1 = f'/home/tjtsai/ttmp/Chopin_Mazurkas_features/{type1}/Chopin_Op017No4/{pieceid1}.npy'
featfile2 = f'/home/tjtsai/ttmp/Chopin_Mazurkas_features/{type2}/Chopin_Op017No4/{pieceid2}.npy'
F1 = np.load(featfile1)
F2 = np.load(featfile2)

In [None]:
if type1 == 'original':
    annotfile1 = f'/home/tjtsai/ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op017No4/{pieceid1}.beat'
else:
    annotfile1 = f'/home/tjtsai/ttmp/Chopin_Mazurkas_Benchmarks/{type1}/annotations_beat/Chopin_Op017No4/{pieceid1}.beat'
if type2 == 'original':
    annotfile2 = f'/home/tjtsai/ttmp/Chopin_Mazurkas_Modified/annotations_beat/Chopin_Op017No4/{pieceid2}.beat'
else:
    annotfile2 = f'/home/tjtsai/ttmp/Chopin_Mazurkas_Benchmarks/{type2}/annotations_beat/Chopin_Op017No4/{pieceid2}.beat'
gt1, gt2 = getTimestamps(annotfile1, annotfile2)

In [None]:
C = 1 - L2norm(F1).T @ L2norm(F2)
start = time.time()
best_cost, wp, debug = dtwstar_v2a(C, steps, weights, buffer/hop_sec)
print("% s seconds" % (time.time() - start))
X = np.array(debug)
times = X[:,0]*hop_sec
scores = X[:,1]
plt.plot(times, scores,'.')
plt.axvline(times.min()+10, color='r')
plt.axvline(times.max()-10, color='r')
plt.axvline(mapFrame(gt1[-1], gt2[-1], (F1.shape[1]*hop_sec, F2.shape[1]*hop_sec)), color='g')

In [None]:
plt.plot(wp[0,:]*hop_sec, wp[1,:]*hop_sec, 'y.')
plt.plot(gt1, gt2, 'g')
plt.legend(['pred', 'gt'])
plt.xlim([0, F1.shape[1]*hop_sec+2])
plt.ylim([0, F2.shape[1]*hop_sec+2])

# DTW* v0.2b

Seems to have about same accuracy as v0.2a, but is significantly slower.

In [None]:
# @jit(nopython=True)
# def dtwstar_v2b(C, steps, weights):
    
#     # initialize
#     D = np.zeros(C.shape)
#     B = np.zeros(C.shape, dtype=np.int8)
#     P = np.zeros(C.shape, dtype=np.int32)
#     D[0,:] = C[0,:]
#     D[:,0] = C[:,0]
    
#     # DP
#     for row in range(1,C.shape[0]):
#         for col in range(1, C.shape[1]):
#             mincost = np.inf
#             minidx = -1
#             bestrprev = -1
#             bestcprev = -1
#             for stepidx, step in enumerate(steps):
#                 (rstep, cstep) = step
#                 prevrow = row - rstep
#                 prevcol = col - cstep
#                 if prevrow >= 0 and prevcol >= 0:
                    
#                     # calculate avg cost per manhattan block
#                     pathcost = D[prevrow, prevcol] + C[row, col] * weights[stepidx]
#                     if P[prevrow, prevcol] >= 0:
#                         mdist = row + (col - P[prevrow, prevcol])
#                     else:
#                         mdist = (row + P[prevrow, prevcol]) + col
#                     cost_per_mb = pathcost / mdist
                    
#                     # select best transition based on avg cost per manhattan block
#                     if cost_per_mb < mincost:
#                         mincost = cost_per_mb
#                         minidx = stepidx
#                         bestrprev = prevrow
#                         bestcprev = prevcol
                        
#             D[row, col] = D[bestrprev, bestcprev] + C[row, col] * weights[minidx]
#             B[row, col] = minidx
#             if bestrprev == 0:
#                 P[row, col] = bestcprev
#             elif bestcprev == 0:
#                 P[row, col] = -1*bestrprev
#             else:
#                 P[row, col] = P[bestrprev, bestcprev]
            
#     #  backtrack
#     best_cost, best_r, best_c, debug = find_best_endpoint(D, P)
#     path = backtrace_dtwstar(D, B, steps, best_r, best_c)
#     path.reverse()
#     path = np.array(path)
    
#     return best_cost, path.T, debug

## SubseqDTW and NWTW

Can be used to compare to SubseqDTW and NWTW alignments

In [None]:
#%run _NWTW.ipynb

In [None]:
#%run align_tools_cython.ipynb

In [None]:
# times = []
# times.append(time.time())
# #best_cost, wp, debug = dtwstar_v2a(C, steps, weights, buffer/hop_sec)
# wp1 = alignDTW(L2norm(F1), L2norm(F2), steps=steps, weights=weights, downsample=1, outfile=None, subseq=True)
# times.append(time.time())
# print("SubseqDTW: % s seconds" % (times[1]-times[0]))
# wp2 = alignNWTW(L2norm(F1), L2norm(F2), downsample=1, gamma=0.346, profile = False)
# times.append(time.time())
# print("NWTW: %s seconds" % (times[2] - times[1]))

In [None]:
# plt.plot(wp1[0,:]*hop_sec, wp1[1,:]*hop_sec, 'r')
# plt.plot(wp2[0,:]*hop_sec, wp2[1,:]*hop_sec, 'b')
# plt.plot(gt1, gt2, 'g')
# plt.legend(['dtw','nwtw','gt'])
# plt.xlim([0, F1.shape[1]*hop_sec+2])
# plt.ylim([0, F2.shape[1]*hop_sec+2])
# #plt.xlim([0,50])
# #plt.ylim([0,80])
# #plt.xlim([130,170])
# #plt.ylim([150,200])