# Implementing HSTW-MLv0.2

In [4]:
import numpy as np
import pickle
from scipy.spatial.distance import cdist
import python_speech_features
from matplotlib import pyplot as plt
import time
import os
import pandas as pd
import glob
from tqdm import tqdm
from multiprocessing import Pool
from multiprocessing import cpu_count
from offset import find_offset as offset_hps
from random import sample
from eer_utils import *

ModuleNotFoundError: No module named 'eer_utils'

## the HSTW portion of the system

Code is adapted from HSTW GitHub repo notebook: 02a_HPTWAlign.ipynb

In [5]:
%load_ext Cython

In [6]:
%%cython

import numpy as np
cimport numpy as np
cimport cython

DTYPE_INT32 = np.int32
ctypedef np.int32_t DTYPE_INT32_t

DTYPE_FLOAT = np.float64
ctypedef np.float64_t DTYPE_FLOAT_t

cdef DTYPE_FLOAT_t MAX_FLOAT = float('inf')

# @cython.boundscheck(False)
def HSTW(double[:,:]C, float alpha, int beta=20, int gamma=1):
    """
    C (np.array) : Cost matrix 
    alpha (float) : skip penalty for vertical transitions 
    beta (int) : plane transition penalty
    gamma (int) : skip penalty for horizontal transitions
    
    returns
        B (np.array) : Steps matrix
        D (np.array) : Cumulative cost matrix
    """
    # 0: visible, 1: hidden
    # B: 1 Diag, 2 Right, 3 Up, 0 switch plane
    # initialization
    
    cdef DTYPE_INT32_t numRows = C.shape[0]
    cdef DTYPE_INT32_t numCols = C.shape[1]
    
    cdef np.ndarray[np.uint32_t, ndim=3] B = np.zeros((2, numRows,numCols), dtype=np.uint32)
    cdef np.ndarray[DTYPE_FLOAT_t, ndim=3] D = np.ones((2, numRows, numCols), dtype=DTYPE_FLOAT) * MAX_FLOAT

    cdef DTYPE_INT32_t i, j
    cdef unsigned int best_step
    cdef DTYPE_FLOAT_t cost, new_cost
    
    # bottom rows
    D[0, 0] = C[0]
    
    # first cols
    for i in range(1, C.shape[0]):
        D[1, i, 0] = D[1, i-1, 0] + alpha
        D[0, i, 0] = D[1, i, 0] + beta
        B[0, i, 0] = 3
        B[1, i, 0] = 0
        
    # rest of the matrix
    for i in range(1, C.shape[0]):
        for j in range(1, C.shape[1]):
        
            # hidden
            # diag visible -> hidden, right in hidden, up in hidden
            
            cost = D[0, i-1, j-1] + gamma + alpha
            step = 0
            new_cost = D[1, i, j-1] + gamma
            if new_cost < cost:
                cost = new_cost
                step = 2
            new_cost = D[1, i-1, j] + alpha
            if new_cost < cost:
                cost = new_cost
                step = 3
            D[1, i, j] = cost
            B[1, i, j] = step
             
            # visible
            # hidden -> visible, diag
            cost = D[1, i, j] + beta
            step = 0
            new_cost = D[0, i-1, j-1] + C[i, j]
            if new_cost < cost:
                cost = new_cost
                step = 1
            D[0, i, j] = cost
            B[0, i, j] = step
    return B, D

@cython.boundscheck(False)
def backtrace3D(unsigned int[:,:,:] B, double[:,:,:] D):
    """
    B (np.array) : Steps matrix
    D (np.array) : Cumulative cost matrix
    
    returns
        path (np.array) : 3 columns representing [hidden? (bool), query frame, ref frame]
    """
    cdef int p, r, c
    cdef unsigned int step = 0
    
    p = 0
    r = D.shape[1] - 1
    c = np.argmin(D[0, D.shape[1] - 1])
    cdef np.ndarray[np.int32_t, ndim=2] path_3D = np.zeros((D.shape[1]+D.shape[2], 3), dtype=np.int32)
    
    while r >= 0:
        path_3D[step] = [p,r,c]
        step += 1
        if B[p, r, c] == 0 and p == 0:
            p = 1
            r -= 1
            c -= 1
        elif B[p, r, c] == 0 and p == 1:
            p = 0
        elif B[p, r, c] == 1:
            r -= 1
            c -= 1
        elif B[p, r, c] == 2:
            c -= 1
        elif B[p, r, c] == 3:
            r -= 1
    return path_3D[:step]

In file included from /home/arm/anaconda3/lib/python3.9/site-packages/numpy/core/include/numpy/ndarraytypes.h:1969,
                 from /home/arm/anaconda3/lib/python3.9/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /home/arm/anaconda3/lib/python3.9/site-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /home/arm/.cache/ipython/cython/_cython_magic_ee7895a3f9ae4cc907619968bf5eb46a.c:710:
      |  ^~~~~~~


In [7]:
#Aligns a query file with its corresponding reference file and returns the 3-D path throught the HSTW tensor
def alignHSTW(C, Ca = 2.4, Cb = 33, gamma = 3):
    """
    C (np.array) : cost matrix of ref and query
    Ca (float) : alpha parameter multiplier (see HSTW paper)
    Cb (int) : beta parameter multiplier (see HSTW paper)
    gamma (int) : skip penatly for horizontal transitions (see HSTW paper)
    
    returns
        path (np.array) : 3 columns representing [hidden? (bool), query frame, ref frame]
    """
    alpha = np.median(np.min(C, axis=1)) * Ca
    B, D = HSTW(C, alpha, beta=(alpha+gamma)*Cb)
    path_3D = backtrace3D(B, D)
    return path_3D

## the ML portion of the system

In [8]:
def find_offset(C):
    """
    C (np.array) : cost matrix of ref and query
    
    returns
        min_offset (int) : frame at optimal diagonal path
    """
    diag_sums = [C.diagonal(i).sum() for i in range(C.shape[1]-C.shape[0])]
    min_offset = np.argmin(diag_sums)  
    return min_offset

def find_matching_frames(offset, path, threshold=0):
    """
    offset (int) : frame at optimal diagonal path
    path (np.array) : 3 columns representing [hidden? (bool), query frame, ref frame]
    
    returns
        matching (np.array) : 3 columns representing [not matching? (bool), query frame, ref frame]
    """
    matching = path.copy()
    for idx, x in enumerate(matching):
        plane, q, r = x
        if plane == 0 and abs(r - q - offset) > threshold:
            matching[idx][0] = 1
    if np.sum(matching[:,0]) == 0:
        return None
    return matching

def calculate_scores_H1(query, ref, matching, return_all=False):
    """
    query (np.array) : mfcc features for query
    ref (np.array) : mfcc features for reference
    matching (np.array) : 3 columns representing [matching? (bool), query frame, ref frame]
    
    returns
        (float) : H1 modified z-score 
    """
    matching_diffs = np.array([query[q] - ref[r] for m, q, r in matching if m == 0])
    non_matching_diffs = np.array([query[q] - ref[r] for m, q, r in matching if m == 1])
    mean, std = matching_diffs.mean(axis=0), matching_diffs.std(axis=0)
    scores = (non_matching_diffs - mean) / std
    if return_all:
        return scores, mean, std
    return np.abs(scores).mean()

def calculate_scores_H2(query, ref, matching, return_all=False):
    """
    query (np.array) : mfcc features for query
    ref (np.array) : mfcc features for reference
    matching (np.array) : 3 columns representing [matching? (bool), query frame, ref frame]
    
    returns
        (float) : H2 modified z-score 
    """
    q_matching = query[matching[matching[:,0]==0][:,1]]
    r_matching = ref[matching[matching[:,0]==0][:,2]]
    mean, std = q_matching.mean(axis=0) - r_matching.mean(axis=0), q_matching.std(axis=0) + r_matching.std(axis=0)
    non_matching_diffs = np.array([query[q] - ref[r] for m, q, r in matching if m == 1])
    scores = (non_matching_diffs - mean) / std
    if return_all:
        return scores, mean, std
    return np.abs(scores).mean()

In [9]:
def sec_to_mfcc(sec):
    """
    sec (float): the optimal offset in seconds
    
    returns 
        (int): the corresponding mfcc frame
    """
    winstep = 0.01
    
    return round(sec/winstep)

## lets start the big loop

In [10]:
mfcc_DIR = './daps-mp3/test/mfccs/'

queries = ['queries/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'queries/'))]
tamp_025 = ['tampered0.25/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered0.25/'))]
tamp_05 = ['tampered0.5/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered0.5/'))]
tamp_1 = ['tampered1/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered1/'))]
tamp_2 = ['tampered2/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered2/'))]
tamp_4 = ['tampered4/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered4/'))]

In [11]:
# get dictionary of reference MFCCs
ref_mfcc_dict = {file[:-10]: np.load(mfcc_DIR + 'refs/'+file) for file in sorted(os.listdir(mfcc_DIR + 'refs/'))}

In [12]:
all_queries = queries + tamp_025 + tamp_05 + tamp_1 + tamp_2 + tamp_4

In [13]:
def calculate_tamper_score(query):
    """
    query (str) : name of query
    
    returns
        tamper_type (str) : type of tampering, out of [NONE, INS, DEL, REP]
        tamper_len (float) : tampering duration in seconds, out of [0.25, 0.5, 1, 2, 4]
        bitrate (str) : bitrate of audio file, out of [64k, 128k, 256k]
        ref_name (str) : name of reference used to generate query
        query_no (int) : number of query
        tamper_score (float) : difference of H1 and H2 z-scores
    """
    query_type, query_name = query.split('/')

    if query_type == "queries":
        tamper_type = "NONE"
        tamper_len = 0.
    else:
        tamper_type = query_name[:3].upper()
        tamper_len = float(query_type[len('tampered'):])
    
    _, query_no, speaker, script, _ = query_name.split('_')
    _, bitrate = query_name.split('-')
    ref_name = f'{speaker}_{script}'
    
    # load query mfcc
    query_mfcc = np.load(mfcc_DIR + query + '.npy')
    
    # load ref mfcc
    ref_mfcc = ref_mfcc_dict[ref_name]
    
    # threshold delta delta and find offset
    query_mhps = np.dot(query_mfcc[:,13:] > 0,np.power(2,np.arange(26))[::-1]).tolist()
    ref_mhps = np.dot(ref_mfcc[:,13:] > 0,np.power(2,np.arange(26))[::-1]).tolist()
    
    offset = offset_hps(query_mhps, ref_mhps)
    
    start = max(0, offset-sec_to_mfcc(2.5))
    end = min(offset+query_mfcc.shape[0]+sec_to_mfcc(2.5), ref_mfcc.shape[0])

    
    ref_mfcc = ref_mfcc[start:end]
    
    C = cdist(query_mfcc, ref_mfcc).astype('float64')
    
    # use HSTW to align query to reference
    path = alignHSTW(C, Ca = 2.4, Cb = 33, gamma = 3)
    
    # find best diagonal path 
    best_offset = find_offset(C)
    
    # classify frames as matching or not
    match = find_matching_frames(best_offset, path, threshold=0)
    
    if match is None:
        tamper_score = 0
        h1 = 0
    elif np.all(match[:,0] == 1):
        tamper_score = h1 = 100
    else:
        h1 = calculate_scores_H1(query_mfcc, ref_mfcc, match, return_all=False)
        h2 = calculate_scores_H2(query_mfcc, ref_mfcc, match, return_all=False)
        
        tamper_score = h1 - h2
    
    return tamper_type, tamper_len, bitrate, ref_name, query_no, tamper_score

In [42]:
p = Pool(39)
with p:
    results_queries = list(tqdm(p.imap_unordered(calculate_tamper_score, all_queries), total=len(all_queries)))

100%|████████████████████████████████████| 24000/24000 [02:33<00:00, 156.02it/s]


In [15]:
df = pd.DataFrame(columns=['type', 'len', 'bitrate', 'ref', 'query_no', 'score'], data=results_queries)

In [16]:
outdir = 'mfccs'
os.makedirs(f'./daps-mp3/results/{outdir}', exist_ok=True)
df.to_csv(f'./daps-mp3/results/{outdir}/HSTW_cython_test.csv')

## Evaluation code

In [2]:
df = pd.read_csv('./daps-mp3/results/mfccs/HSTW_cython_test.csv')

In [3]:
for bitrate in ['256k', '128k', '64k']:
    get_eer_table(df, bitrate)

NameError: name 'roc_curve' is not defined