In [1]:
import os, sys
import numpy as np
import pandas as pd
from tqdm import tqdm

### Util Functions

In [2]:
def compute_det_curve(target_scores, nontarget_scores):
    n_scores = target_scores.size + nontarget_scores.size
    all_scores = np.concatenate((target_scores, nontarget_scores))
    labels = np.concatenate((np.ones(target_scores.size), np.zeros(nontarget_scores.size)))

    # Sort labels based on scores
    indices = np.argsort(all_scores, kind='mergesort')
    labels = labels[indices]

    # Compute false rejection and false acceptance rates
    tar_trial_sums = np.cumsum(labels)
    nontarget_trial_sums = nontarget_scores.size - (np.arange(1, n_scores + 1) - tar_trial_sums)

    frr = np.concatenate((np.atleast_1d(0), tar_trial_sums / target_scores.size))  # false rejection rates
    far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums / nontarget_scores.size))  # false acceptance rates
    thresholds = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores

    return frr, far, thresholds

def compute_eer(target_scores, nontarget_scores):
    target_scores = np.array(target_scores).flatten()
    nontarget_scores = np.array(nontarget_scores).flatten()
    frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
    abs_diffs = np.abs(frr - far)
    min_index = np.argmin(abs_diffs)
    eer = np.mean((frr[min_index], far[min_index]))
    return eer, thresholds[min_index]

def eer_wrapper(score, bonafide_or_deepfake):
    pos_samples = score[bonafide_or_deepfake == 1]
    neg_samples = score[bonafide_or_deepfake == 0]
    return compute_eer(pos_samples, neg_samples)

### Get all baseline data

In [3]:
# Get all csvs
csvs = [f for f in os.listdir("./baselines_csv") if f.endswith(".csv")]
print(len(csvs))

2


In [4]:
source_datasets, singer_ids, attack_ids, bonafide_or_deepfake = [], [], [], []
with open("test_groundtruth.txt", "r") as f:
    lines = f.readlines()
    lines.sort(key=lambda x: int(x.split()[2].split("_")[-1]))
    source_datasets, singer_ids, filename, _, attack_ids, bonafide_or_deepfakes = zip(*[line.strip().split() for line in lines])
    bonafide_or_deepfakes = [1 if x == "bonafide" else 0 for x in bonafide_or_deepfakes]
    
    # create ground_truth reference dataframe
    ground_truth = pd.DataFrame({"source_dataset": source_datasets, "singer_id": singer_ids, "filename": filename, "attack_id": attack_ids, "bonafide_or_deepfake": bonafide_or_deepfakes})

### Analysis (all)

In [5]:
all_results = []
for csv in tqdm(csvs):
    data = pd.read_csv(f"./baselines_csv/{csv}")
    df = data.merge(ground_truth, on="filename", how="left")

    # compute EER for each attack
    results = {
        "per_attack": {},
        "per_dataset": {},
    }

    bonafide_df = df[df["bonafide_or_deepfake"] == 1]
    # remove all ACESinger from bonafide_df
    bonafide_df = bonafide_df[bonafide_df["source_dataset"] != "acesinger"]
    acesinger_bonafide_df = df[(df["source_dataset"] == "acesinger") & (df["bonafide_or_deepfake"] == 1)]

    for attack_id in df["attack_id"].unique():
        if attack_id == "-":
            continue
        attack_df = df[df["attack_id"] == attack_id]
        attack_df = pd.concat([attack_df, bonafide_df])
        if attack_id == "A14":
            attack_df = pd.concat([attack_df, acesinger_bonafide_df]) # add ACESinger bonafide samples if A14 attck; this should make A14 yield the same result as the previous official evaluation script
        eer, threshold = eer_wrapper(attack_df["score"].values, attack_df["bonafide_or_deepfake"].values)
        results["per_attack"][attack_id] = eer

    for source_dataset in df["source_dataset"].unique():
        dataset_df = df[df["source_dataset"] == source_dataset]
        eer, threshold = eer_wrapper(dataset_df["score"].values, dataset_df["bonafide_or_deepfake"].values)
        results["per_dataset"][source_dataset] = eer


    pooled_attack_eer = eer_wrapper(df["score"].values, df["bonafide_or_deepfake"].values)[0]
    no_A14_df = df[df["attack_id"] != "A14"]
    # remove all ACESinger from no_A14_df
    no_A14_df = no_A14_df[no_A14_df["source_dataset"] != "acesinger"]
    pooled_attack_discard_A14_eer = eer_wrapper(no_A14_df["score"].values, no_A14_df["bonafide_or_deepfake"].values)[0]

    results["per_attack"] = dict(sorted(results["per_attack"].items(), key=lambda x: int(x[0].split("A")[-1])))
    
    all_results.append({
        "csv": csv,
        "EER (w/o A14)": pooled_attack_discard_A14_eer,
        "EER (overall)": pooled_attack_eer,
        "A09": results["per_attack"]["A09"],
        "A10": results["per_attack"]["A10"],
        "A11": results["per_attack"]["A11"],
        "A12": results["per_attack"]["A12"],
        "A13": results["per_attack"]["A13"],
        "KiSing": results["per_dataset"]["kising"],
        "M4Singer": results["per_dataset"]["m4singer"],
        "ACESinger (A14)": results["per_dataset"]["acesinger"],
    })
    
results_df = pd.DataFrame(all_results)
results_df.to_csv("results.csv", index=False)

100%|█████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.28it/s]
