In [1]:
import numpy as np
from sklearn.metrics import roc_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib
matplotlib.use('Agg')
from keras.layers import Input, Conv2D, Lambda, concatenate, Dense, Flatten,MaxPooling2D,Activation
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras import backend as K
from keras.optimizers import SGD,Adam
from keras.losses import binary_crossentropy
import os
import pickle
import matplotlib.pyplot as plt
import random
from itertools import combinations

Using TensorFlow backend.


# Helper Functions

In [2]:
# alpha = 5

## making dictionary to find blacklist pair between train and test dataset
bl_match = np.loadtxt('data/bl_matching.csv',dtype='str')
dev2train={}
dev2id={}
train2dev={}
train2id={}
test2train={}
train2test={}
for iter, line in enumerate(bl_match):
    line_s = line.split(',')
    dev2train[line_s[1].split('_')[-1]]= line_s[3].split('_')[-1]
    dev2id[line_s[1].split('_')[-1]]= line_s[0].split('_')[-1]
    train2dev[line_s[3].split('_')[-1]]= line_s[1].split('_')[-1]
    train2id[line_s[3].split('_')[-1]]= line_s[0].split('_')[-1]
    test2train[line_s[2].split('_')[-1]]= line_s[3].split('_')[-1]
    train2test[line_s[3].split('_')[-1]]= line_s[2].split('_')[-1]
    
def load_ivector(filename):
    utt = np.loadtxt(filename,dtype='str',delimiter=',',skiprows=1,usecols=[0])
    ivector = np.loadtxt(filename,dtype='float32',delimiter=',',skiprows=1,usecols=range(1,601))
    spk_id = []
    for iter in range(len(utt)):
        spk_id = np.append(spk_id,utt[iter].split('_')[0])

    return spk_id, utt, ivector

def length_norm(mat):
# length normalization (l2 norm)
# input: mat = [utterances X vector dimension] ex) (float) 8631 X 600

    norm_mat = []
    for line in mat:
        temp = line/np.math.sqrt(sum(np.power(line,2)))
        norm_mat.append(temp)
    norm_mat = np.array(norm_mat)
    return norm_mat

def make_spkvec(mat, spk_label):
# calculating speaker mean vector
# input: mat = [utterances X vector dimension] ex) (float) 8631 X 600
#        spk_label = string vector ex) ['abce','cdgd']

#     for iter in range(len(spk_label)):
#         spk_label[iter] = spk_label[iter].split('_')[0]

    spk_label, spk_index  = np.unique(spk_label,return_inverse=True)
    spk_mean=[]
    mat = np.array(mat)

    # calculating speaker mean i-vector
    for i, spk in enumerate(spk_label):
        spk_mean.append(np.mean(mat[np.nonzero(spk_index==i)],axis=0))
    spk_mean = length_norm(spk_mean)
    return spk_mean, spk_label

def calculate_EER(trials, scores):
# calculating EER of Top-S detector
# input: trials = boolean(or int) vector, 1: postive(blacklist) 0: negative(background)
#        scores = float vector

    # Calculating EER
    fpr,tpr,threshold = roc_curve(trials,scores,pos_label=1)
    fnr = 1-tpr
    EER_threshold = threshold[np.argmin(abs(fnr-fpr))]
    
    # print EER_threshold
    EER = fpr[np.argmin(np.absolute((fnr-fpr)))]
    print("Top S detector EER is %0.2f%%"% (EER*100))
    return EER

def get_trials_label_with_confusion(identified_label, groundtruth_label,dict4spk,is_trial ):
# determine if the test utterance would make confusion error
# input: identified_label = string vector, identified result of test utterance among multi-target from the detection system 
#        groundtruth_label = string vector, ground truth speaker labels of test utterances
#        dict4spk = dictionary, convert label to target set, ex) train2dev convert train id to dev id

    trials = np.zeros(len(identified_label))
    for iter in range(0,len(groundtruth_label)):
        enroll = identified_label[iter].split('_')[0]
        test = groundtruth_label[iter].split('_')[0]
        if is_trial[iter]:
            if enroll == dict4spk[test]:
                trials[iter]=1 # for Target trial (blacklist speaker)
            else:
                trials[iter]=-1 # for Target trial (backlist speaker), but fail on blacklist classifier
                
        else :
            trials[iter]=0 # for non-target (non-blacklist speaker)
    return trials


def calculate_EER_with_confusion(scores,trials):
# calculating EER of Top-1 detector
# input: trials = boolean(or int) vector, 1: postive(blacklist) 0: negative(background) -1: confusion(blacklist)
#        scores = float vector

    # exclude confusion error (trials==-1)
    scores_wo_confusion = scores[np.nonzero(trials!=-1)[0]]
    trials_wo_confusion = trials[np.nonzero(trials!=-1)[0]]

    # dev_trials contain labels of target. (target=1, non-target=0)
    fpr,tpr,threshold = roc_curve(trials_wo_confusion,scores_wo_confusion,pos_label=1, drop_intermediate=False)
    fnr = 1-tpr
    EER_threshold = threshold[np.argmin(abs(fnr-fpr))]
    
    # EER withouth confusion error
    EER = fpr[np.argmin(np.absolute((fnr-fpr)))]
    
    # Add confusion error to false negative rate(Miss rate)
    total_negative = len(np.nonzero(np.array(trials_wo_confusion)==0)[0])
    total_positive = len(np.nonzero(np.array(trials_wo_confusion)==1)[0])
    fp= fpr*np.float(total_negative)  
    fn= fnr*np.float(total_positive) 
    fn += len(np.nonzero(trials==-1)[0])
    total_positive += len(np.nonzero(trials==-1)[0])
    fpr= fp/total_negative
    fnr= fn/total_positive

    # EER with confusion Error
    EER_threshold = threshold[np.argmin(abs(fnr-fpr))]
    EER_fpr = fpr[np.argmin(np.absolute((fnr-fpr)))]
    EER_fnr = fnr[np.argmin(np.absolute((fnr-fpr)))]
    EER = 0.5 * (EER_fpr+EER_fnr)
    
    print("Top 1 detector EER is %0.2f%% (Total confusion error is %d)"% ((EER*100), len(np.nonzero(trials==-1)[0])))
    return EER,len(np.nonzero(trials==-1)[0])

# Loading dataset

In [3]:
## Loading i-vector
# trn_bl_id, trn_bl_utt, trn_bl_ivector = load_ivector('data/trn_blacklist.csv')
# trn_bg_id, trn_bg_utt, trn_bg_ivector = load_ivector('data/trn_background.csv')
# dev_bl_id, dev_bl_utt, dev_bl_ivector = load_ivector('data/dev_blacklist.csv')
# dev_bg_id, dev_bg_utt, dev_bg_ivector = load_ivector('data/dev_background.csv')


trn_bl_ivector = pickle.load(open('./data/trn_bl_ivector','rb'))
trn_bg_ivector = pickle.load(open('./data/trn_bg_ivector','rb'))
dev_bl_ivector = pickle.load(open('./data/dev_bl_ivector','rb'))
dev_bg_ivector = pickle.load(open('./data/dev_bg_ivector','rb'))
trn_bl_id = pickle.load(open('./data/trn_bl_id','rb'))
trn_bg_id = pickle.load(open('./data/trn_bg_id','rb'))
dev_bl_id = pickle.load(open('./data/dev_bl_id','rb'))
dev_bg_id = pickle.load(open('./data/dev_bg_id','rb'))
trn_bl_utt = pickle.load(open('./data/trn_bl_utt','rb'))
trn_bg_utt = pickle.load(open('./data/trn_bg_utt','rb'))
dev_bl_utt = pickle.load(open('./data/dev_bl_utt','rb'))
dev_bg_utt = pickle.load(open('./data/dev_bg_utt','rb'))
tst_id = pickle.load(open('./data/tst_id','rb'))
test_utt = pickle.load(open('./data/test_utt','rb'))
tst_ivector = pickle.load(open('./data/tst_ivector','rb'))

# Calculating speaker mean vector
spk_mean, spk_mean_label = make_spkvec(trn_bl_ivector,trn_bl_id)

#length normalization

trn_bl_ivector = length_norm(trn_bl_ivector)
trn_bg_ivector = length_norm(trn_bg_ivector)
dev_bl_ivector = length_norm(dev_bl_ivector)
dev_bg_ivector = length_norm(dev_bg_ivector)
tst_ivector = length_norm(tst_ivector)

filename = 'data/tst_evaluation_keys.csv'
tst_info = np.loadtxt(filename,dtype='str',delimiter=',',skiprows=1,usecols=range(0,3))
tst_trials = []
tst_trials_label = []
tst_ground_truth =[]
for iter in range(len(tst_info)):
    tst_trials_label.extend([tst_info[iter,0]])
    if tst_info[iter,1]=='background':
        tst_trials = np.append(tst_trials,0)
        
    else:
        tst_trials = np.append(tst_trials,1)


# making trials of Dev set
dev_ivector = np.append(dev_bl_ivector, dev_bg_ivector,axis=0)
dev_trials = np.append( np.ones([len(dev_bl_id), 1]), np.zeros([len(dev_bg_id), 1]))

trn_ivector = np.append(trn_bl_ivector, trn_bg_ivector,axis=0)
trn_trials = np.append( np.ones([len(trn_bl_ivector), 1]), np.zeros([len(trn_bg_ivector), 1]))

print('\nDev set score using train set :')
# Cosine distance scoring
scores = spk_mean.dot(dev_ivector.transpose())

# Multi-target normalization
blscores = spk_mean.dot(trn_bl_ivector.transpose())
mnorm_mu = np.mean(blscores,axis=1)
mnorm_std = np.std(blscores,axis=1)
for iter in range(np.shape(scores)[1]):
    scores[:,iter]= (scores[:,iter] - mnorm_mu) / mnorm_std
dev_scores = np.max(scores,axis=0)

# Top-S detector EER
dev_EER = calculate_EER(dev_trials, dev_scores)

#divide trial label into target and non-target, plus confusion error(blacklist, fail at blacklist detector)
dev_identified_label = spk_mean_label[np.argmax(scores,axis=0)]
dev_trials_label = np.append( dev_bl_id,dev_bg_id)
dev_trials_utt_label = np.append( dev_bl_utt,dev_bg_utt)

# Top-1 detector EER
dev_trials_confusion = get_trials_label_with_confusion(dev_identified_label, dev_trials_label, dev2train, dev_trials )
dev_EER_confusion,trials = calculate_EER_with_confusion(dev_scores,dev_trials_confusion)



Dev set score using train set :
Top S detector EER is 2.00%
Top 1 detector EER is 12.26% (Total confusion error is 444)


# LDA model

In [39]:
id_set = sorted(set(trn_bl_id))

id2int = {}
for i,spk_id in enumerate(id_set):
    id2int[spk_id] = i

int2id = {v: k for k, v in id2int.items()}


## Generating subclass for Testing

classes = 3631

## Generating (Anchor, Positive, Negative) pairs for One-shot-learning
all_data = np.column_stack((trn_bl_ivector.reshape(classes,3,600),dev_bl_ivector.reshape(classes,1,600)))

label_ls = []
for i in range(3631):
    for label in range(4):
        label_ls.append(i)
label_array = np.array(label_ls)

dev2trn_bl_id = []
for i in dev_bl_id:
    dev2trn_bl_id.append(dev2train[i])
dev2trn_bl_id = np.array(dev2trn_bl_id)
# if i == 0:
    # None
# else:
    # model.load_weights(foldername+filename+'.hdf5')
# X_train, X_test = triplet_generation(all_data,neg_class_num=50)
#Use trained model to predict
trained_model_task1a = LinearDiscriminantAnalysis()
trained_model_task1a.fit(trn_ivector,trn_trials)

trained_model_task1b = LinearDiscriminantAnalysis()
trained_model_task1b.fit(np.append(trn_ivector,dev_ivector, axis=0),np.append(trn_trials,dev_trials))

trained_model_task2a = LinearDiscriminantAnalysis()
trained_model_task2a.fit(trn_bl_ivector,trn_bl_id)

trained_model_task2b = LinearDiscriminantAnalysis()
trained_model_task2b.fit(np.append(trn_bl_ivector,dev_bl_ivector, axis=0),np.append(trn_bl_id,dev2trn_bl_id))
# trained_model.load_weights(filename+'.hdf5')

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [51]:
scores = trained_model_task1a.predict(dev_ivector)
dev_EER = calculate_EER(dev_trials, scores)

##############################1st Report#####################################
# Cosine distance scoring
dev_identified_label = trained_model_task2a.predict(dev_ivector)

#divide trial label into target and non-target, plus confusion error(blacklist, fail at blacklist detector)
# dev_identified_label = spk_mean_label[np.argmax(scores,axis=0)]
# dev_trials_label = np.append( dev_bl_id,dev_bg_id)
# dev_trials_utt_label = np.append( dev_bl_utt,dev_bg_utt)

# Top-1 detector EER
dev_trials_confusion = get_trials_label_with_confusion(dev_identified_label, dev_trials_label, dev2train, dev_trials )
dev_EER_confusion,confu_num1 = calculate_EER_with_confusion(dev_scores,dev_trials_confusion)

Top S detector EER is 0.46%
Top 1 detector EER is 8.62% (Total confusion error is 309)


In [52]:
# transformed_trn_bl_ivector = trained_model.predict(trn_bl_ivector)
# transformed_dev_ivector = trained_model.predict(dev_ivector)
# transformed_trn_ivector = trained_model.predict(trn_ivector)
# transformed_tst_ivector = trained_model.predict(tst_ivector)
# transformed_dev_bl_ivector = trained_model.predict(dev_bl_ivector)
# transformed_spk_mean, transformed_spk_mean_label = make_spkvec(transformed_trn_bl_ivector,trn_bl_id)
print('\nTest set score using train set:')
scores = trained_model_task1a.predict(tst_ivector)
dev_EER = calculate_EER(tst_trials, scores)

##############################1st Report#####################################
# Cosine distance scoring
tst_identified_label = trained_model_task2a.predict(tst_ivector)

#divide trial label into target and non-target, plus confusion error(blacklist, fail at blacklist detector)
# dev_identified_label = spk_mean_label[np.argmax(scores,axis=0)]
# dev_trials_label = np.append( dev_bl_id,dev_bg_id)
# dev_trials_utt_label = np.append( dev_bl_utt,dev_bg_utt)

# Top-1 detector EER
tst_trials_confusion = get_trials_label_with_confusion(tst_identified_label, tst_trials_label, test2train,tst_trials )
tst_EER_confusion,confu_num3 = calculate_EER_with_confusion(scores,tst_trials_confusion)  


################################2nd Report######################################



################################3rd Report #########################################
print('\nTest set score using train + dev set:')
# get dev set id consistent with Train set
scores = trained_model_task1b.predict(tst_ivector)
dev_EER = calculate_EER(tst_trials, scores)

##############################1st Report#####################################
# Cosine distance scoring
tst_identified_label = trained_model_task2b.predict(tst_ivector)

#divide trial label into target and non-target, plus confusion error(blacklist, fail at blacklist detector)
# dev_identified_label = spk_mean_label[np.argmax(scores,axis=0)]
# dev_trials_label = np.append( dev_bl_id,dev_bg_id)
# dev_trials_utt_label = np.append( dev_bl_utt,dev_bg_utt)

# Top-1 detector EER
tst_trials_confusion = get_trials_label_with_confusion(tst_identified_label, tst_trials_label, test2train,tst_trials )
tst_EER_confusion,confu_num3 = calculate_EER_with_confusion(scores,tst_trials_confusion)    



Test set score using train set:
Top S detector EER is 25.07%
Top 1 detector EER is 18.14% (Total confusion error is 367)

Test set score using train + dev set:
Top S detector EER is 23.67%
Top 1 detector EER is 15.62% (Total confusion error is 230)


In [49]:
tst_trials.shape

(16017,)

In [50]:
scores.shape

(16017,)