In [1]:
import librosa as lb
from speechCommon import getPairwiseCostMatrix
from sklearn.metrics.pairwise import euclidean_distances
import matplotlib.pyplot as plt
import python_speech_features
import numpy as np
from numba import jit, njit
from time import time
import pickle as pkl
import os.path
from pathlib import Path

In [2]:
@njit
def NW_DP(C, gamma):
    numRows, numCols = C.shape
    B = np.zeros(C.shape)
    D = np.zeros(C.shape)
    
    D[0,:] = C[0,:]
    for row in range(1, numRows):
        for col in range(numCols):
            # Transition 0 is straight up with cost gamma
            bestCost = D[row-1, col] + gamma
            bestIndex = 0
            
            if col != 0: # Can't move right or diagonal into the first col
                # Transition 1 is right with cost gamma
                thisCost = D[row, col-1] + gamma
                if thisCost < bestCost:
                    bestCost = thisCost
                    bestIndex = 1
            
                # Transition 2 is diagonal with cost from C
                thisCost = D[row-1, col-1] + C[row,col]
                if thisCost < bestCost:
                        bestCost = thisCost
                        bestIndex = 2

            D[row, col] = bestCost
            B[row, col] = bestIndex
    return D, B

In [3]:
def NW_Backtrace(C, D, B, gamma):
    curCol = np.argmin(D[-1,:])
    curRow = D.shape[0] - 1
    path = []
    costs = []
    while curRow >= 0:
        path.append([curRow, curCol])
        costs.append(C[curRow, curCol])
        transition = B[curRow, curCol]
        if transition == 0:
            curRow = curRow - 1
        elif transition == 1:
            curCol = curCol - 1
        else:
            curRow = curRow - 1
            curCol = curCol - 1
        
    return path, costs

In [4]:
def NWAlign(queryFile, refFile, gamma):
    C = getPairwiseCostMatrix(queryFile, refFile)
    
    D, B = NW_DP(C, gamma)
    
    path, costs = NW_Backtrace(C, D, B, gamma)
    
    return np.array(path), np.array(costs)

In [5]:
def NWAlignAll(outdir, pairsFile, gamma, queryDir, refDir):
    outdir = Path(outdir)
    outdir.mkdir(parents=True, exist_ok=True)
    with open(pairsFile, 'r') as f:
        for line in f:
            parts = line.strip().split()
            assert len(parts) == 2
            saveFile = outdir / (os.path.basename(parts[0])[:-8] + ".pkl")
            if not os.path.exists(saveFile):
                queryFile = queryDir + '/' + os.path.basename(parts[0])
                refParts = os.path.basename(parts[0]).split("_")
                ref = refParts[0] + '_' + refParts[1] + '_' + refParts[2] + ".wav"
                refFile = refDir + '/' + ref
                
                path, costs = NWAlign(queryFile, refFile, gamma)

                hyp = {"wp": path, "dist": costs}

                pkl.dump(hyp, open(saveFile, 'wb'))

In [None]:
benchmark = "train"
for gamma in [1, 5, 10, 50, 100, 500, 1000]:
    print("Gamma: ", gamma)
    for editTime in [1,2,3,4]:
        print(editTime)
        outdir = "/home/tshaw/TamperingDetection/hyp/%ssec/NW-%s" % (str(editTime), str(gamma))
        pairsFile = "/home/tshaw/TamperingDetection/cfg_files/%s_%ss.pairs" % (benchmark, str(editTime))
        queryDir = "/home/tshaw/TamperingDetection/speech/queries/wav/160kbps/%ssec" % str(editTime)
        refDir = "/home/tshaw/TamperingDetection/speech/ref/wav"
        NWAlignAll(outdir, pairsFile, gamma, queryDir, refDir)

Gamma:  1
1
