# MFCC-Euclidean baseline

In [1]:
import numpy as np
import pickle
import librosa
from scipy.spatial.distance import cdist
import python_speech_features
import numba
from matplotlib import pyplot as plt
from matplotlib import gridspec
import time
import os
import pandas as pd
import glob
from tqdm import tqdm
from multiprocessing import Pool
from multiprocessing import cpu_count
from scipy.optimize import brentq
from scipy.interpolate import interp1d
from sklearn.metrics import roc_curve, auc
from offset import find_offset as offset_hps

In [2]:
mfcc_DIR = './daps-mp3/test/mfccs/'
#HP_DIR = './daps-mp3/train/hashprints/'

queries = ['queries/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'queries/'))]
tamp_025 = ['tampered0.25/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered0.25/'))]
tamp_05 = ['tampered0.5/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered0.5/'))]
tamp_1 = ['tampered1/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered1/'))]
tamp_2 = ['tampered2/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered2/'))]
tamp_4 = ['tampered4/' + file[:-4] for file in sorted(os.listdir(mfcc_DIR + 'tampered4/'))]

In [3]:
#ref_hp_dict = {file[:-10]: np.load(HP_DIR + 'refs/'+file) for file in sorted(os.listdir(HP_DIR + 'refs/'))}
ref_mfcc_dict = {file[:-10]: np.load(mfcc_DIR + 'refs/'+file) for file in sorted(os.listdir(mfcc_DIR + 'refs/'))}

In [4]:
all_queries = queries + tamp_025 + tamp_05 + tamp_1 + tamp_2 + tamp_4

In [5]:
def calculate_tamper_score(query):
    startTime = time.time()
    
    query_type, query_name = query.split('/')

    if query_type == "queries":
        tamper_type = "NONE"
        tamper_len = 0.
    else:
        tamper_type = query_name[:3].upper()
        tamper_len = float(query_type[len('tampered'):])
    
    _, query_no, speaker, script, _ = query_name.split('_')
    _, bitrate = query_name.split('-')
    ref_name = f'{speaker}_{script}'
    
    # load query mfcc
    query_mfcc = np.load(mfcc_DIR + query + '.npy')
    
    # load ref mfcc
    ref_mfcc = ref_mfcc_dict[ref_name]
    
    # threshold delta delta and find offset
    query_mhps = np.dot(query_mfcc[:,13:] > 0,np.power(2,np.arange(26))[::-1]).tolist()
    ref_mhps = np.dot(ref_mfcc[:,13:] > 0,np.power(2,np.arange(26))[::-1]).tolist()
    
    offset = offset_hps(query_mhps, ref_mhps)
    ref_mfcc = ref_mfcc[offset:offset+len(query_mfcc)]
    
    tamper_score = np.sqrt(np.square(abs(query_mfcc - ref_mfcc)).sum(axis=1)).mean()

    
    return tamper_type, tamper_len, bitrate, ref_name, query_no, tamper_score

In [44]:
for i in np.random.randint(0, len(queries), 20):
    print(calculate_tamper_score(queries[i]))

('NONE', 0.0, '256k', 'm9_script4', '2', 16.193056548413697)
('NONE', 0.0, '256k', 'f10_script3', '2', 17.98913843142853)
('NONE', 0.0, '64k', 'm6_script2', '8', 17.838492125933474)
('NONE', 0.0, '64k', 'f7_script5', '8', 18.664820361904184)
('NONE', 0.0, '128k', 'm6_script3', '4', 14.03410113378737)
('NONE', 0.0, '256k', 'f8_script2', '4', 15.340184539561587)
('NONE', 0.0, '64k', 'm8_script3', '1', 15.359896840277642)
('NONE', 0.0, '256k', 'm9_script3', '7', 11.154506158065287)
('NONE', 0.0, '64k', 'm10_script1', '4', 15.50058523267706)
('NONE', 0.0, '128k', 'm8_script2', '5', 13.031519868925516)
('NONE', 0.0, '128k', 'm8_script4', '1', 16.196058340506358)
('NONE', 0.0, '64k', 'f7_script2', '1', 18.731694342433297)
('NONE', 0.0, '128k', 'm6_script5', '3', 17.80401855283843)
('NONE', 0.0, '64k', 'm8_script5', '4', 16.361273875113596)
('NONE', 0.0, '128k', 'm6_script2', '1', 15.387513901426049)
('NONE', 0.0, '256k', 'f10_script5', '4', 17.586141375632764)
('NONE', 0.0, '256k', 'f9_scrip

In [47]:
for i in np.random.randint(3000, 4500, 20):
    print(calculate_tamper_score(tamp_4[i]))

('REP', 4.0, '128k', 'f9_script4', '2', 37.56874234620602)
('REP', 4.0, '64k', 'm6_script4', '5', 38.82220898909815)
('REP', 4.0, '128k', 'f9_script3', '7', 35.79696786831045)
('REP', 4.0, '128k', 'f7_script1', '4', 42.56484274354626)
('REP', 4.0, '64k', 'm10_script2', '3', 41.06682098750665)
('REP', 4.0, '64k', 'f9_script5', '5', 42.72135584617209)
('REP', 4.0, '64k', 'f10_script1', '3', 37.9076425538059)
('REP', 4.0, '128k', 'f6_script1', '2', 38.25152672082054)
('REP', 4.0, '256k', 'f7_script2', '6', 40.81802115996839)
('REP', 4.0, '128k', 'f10_script2', '3', 40.21664471883373)
('REP', 4.0, '128k', 'f6_script5', '1', 38.658737500043415)
('REP', 4.0, '64k', 'm10_script1', '6', 42.40659018639958)
('REP', 4.0, '128k', 'm7_script5', '6', 41.6290202144532)
('REP', 4.0, '128k', 'f6_script3', '1', 37.20521977854127)
('REP', 4.0, '256k', 'm10_script5', '6', 41.06094696501961)
('REP', 4.0, '64k', 'f7_script3', '1', 45.28098368176784)
('REP', 4.0, '128k', 'm7_script1', '5', 41.239996148262485

In [6]:
p = Pool(cpu_count()-1)
with p:
    results_queries = list(tqdm(p.imap_unordered(calculate_tamper_score, all_queries), total=len(all_queries)))

100%|████████████████████████████████████| 24000/24000 [01:12<00:00, 332.58it/s]


In [49]:
df = pd.DataFrame(columns=['type', 'len', 'bitrate', 'ref', 'query_no', 'score'], data=results_queries)

In [52]:
def calculate_eer(fpr, tpr):
    '''
    requires fpr, tpr output from roc_curve (sklearn.metrics)
    Returns the equal error rate for a binary classifier output.
    '''
    eer = brentq(lambda x : 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
    
    return eer*100

In [53]:
labels = ['INS','DEL','REP','aggregate']
tamperlens = [4, 2, 1, 0.5, 0.25, 'aggregate']

In [56]:
def get_eer_table(results_df, bitrate):
    
    results_df = results_df[results_df["bitrate"] == bitrate]
    results_df['truth'] = 1
    results_df.loc[results_df['type'] == 'NONE']['truth'] = 0

    total = {'tamper_len':tamperlens, 'INS':[] ,'DEL':[] ,'REP':[] ,'aggregate':[]}
    for label in labels:
        cols = []
        if label != 'aggregate':
            lab = results_df[(results_df["type"] == label) | (results_df["type"] == 'NONE')]
        else:
            lab = results_df
        
        for lens in tamperlens:
            if lens != 'aggregate':
                len_lab = lab[(lab['len'] == lens) | (lab['len'] == 0)]
            else:
                len_lab = lab
            fpr, tpr, thresholds = roc_curve(len_lab['truth'], len_lab['score'])
            eer = calculate_eer(fpr, tpr)
            cols.append(eer)
            
        total[label]=cols
        
    df = results_df.astype(str)
    df = df.style.set_caption('bitrate: '+ str(bitrate)).hide_index()
    df = pd.DataFrame(data=total)
    display(df) 
    
    return df

In [59]:
first = get_eer_table(df, '256k')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_df.loc[results_df['type'] != 'NONE', 'truth'] = 1
  df = df.style.set_caption('bitrate: '+ str(bitrate)).hide_index()


Unnamed: 0,tamper_len,INS,DEL,REP,aggregate
0,4,0.0,0.4,0.0,0.133333
1,2,0.2,1.6,1.2,1.0
2,1,1.6,2.0,12.0,6.2
3,0.5,4.6,2.2,29.4,14.8
4,0.25,5.4,3.2,39.8,19.8
5,aggregate,2.72,2.0,19.56,9.173333


In [60]:
first = get_eer_table(df, '128k')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_df.loc[results_df['type'] != 'NONE', 'truth'] = 1
  df = df.style.set_caption('bitrate: '+ str(bitrate)).hide_index()


Unnamed: 0,tamper_len,INS,DEL,REP,aggregate
0,4,0.0,0.4,0.0,0.133333
1,2,0.2,1.6,1.0,0.933333
2,1,1.4,1.8,12.0,5.933333
3,0.5,4.6,2.0,30.0,14.6
4,0.25,5.4,3.0,39.8,19.666667
5,aggregate,2.56,1.88,19.48,9.0


In [61]:
first = get_eer_table(df, '64k')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_df.loc[results_df['type'] != 'NONE', 'truth'] = 1
  df = df.style.set_caption('bitrate: '+ str(bitrate)).hide_index()


Unnamed: 0,tamper_len,INS,DEL,REP,aggregate
0,4,0.0,0.2,0.0,0.066667
1,2,0.2,1.0,0.4,0.533333
2,1,1.0,1.8,10.0,5.133333
3,0.5,3.8,1.8,27.6,13.733333
4,0.25,4.8,2.6,38.8,18.933333
5,aggregate,2.2,1.52,18.8,8.8
