In [88]:
import os

import pandas as pd
from IPython.display import HTML, display
from ay2.tools.pandas import format_numeric_of_df_columns

In [79]:
def format_res(x, acc=1, auc=1, eer=1, metric_prefix="test"):
    # _acc = "${:5.2f}$".format(x[f"{metric_prefix}-acc"] * 100).replace(" ", "\phantom{0}")
    # _auc = "${:5.2f}$".format(x[f"{metric_prefix}-auc"] * 100).replace(" ", "\phantom{0}")
    # _eer = "${:5.2f}$".format(x[f"{metric_prefix}-eer"] * 100).replace(" ", "\phantom{0}")

    _acc = "{:.2f}".format(x[f"{metric_prefix}-acc"] * 100)
    _auc = "{:.2f}".format(x[f"{metric_prefix}-auc"] * 100)
    _eer = "{:.2f}".format(x[f"{metric_prefix}-eer"] * 100)
    
    res = ""
    if acc:
        res += _acc
    if auc:
        if res:
            res += "/"
        res += _auc
    if eer:
        if res:
            res += "/"
        res += _eer
    return res


ROOT_PATH = "/home/ay/data/DATA/1-model_save/00-Deepfake/1-df-audio-new"

def read_test_result(model, task, version=0, metric_prefix="test", file_name="test"):
    save_path = f"{ROOT_PATH}/{model}/{task}/version_{version}"
    csv_path = os.path.join(save_path, f"{file_name}.csv")

    if not os.path.exists(csv_path):
        print("Warning!!!! cannot find: ", csv_path)
        return None

    data = pd.read_csv(csv_path)
    data = data[[f"{metric_prefix}-acc", f"{metric_prefix}-auc", f"{metric_prefix}-eer"]]
    return data

In [73]:
_versions = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10]
MODELs_VERSIONS = {
    "LCNN": _versions,
    "RawNet2": _versions,
    "RawGAT": _versions,
    "Wave2Vec2": _versions,
    "WaveLM": _versions,
    "LibriSeVoc": _versions,
    "AudioClip": _versions,
    "Wav2Clip": _versions,
    "AASIST": _versions,
    "SFATNet": _versions,
    "ASDG": _versions,
    "Ours/ResNet":_versions,
}
models = MODELs_VERSIONS.keys()
versions = MODELs_VERSIONS.values()

In [58]:
tasks=["LibriSeVoc_inner", "wavefake_inner", "DECRO_english", "DECRO_chinese"]

for model, version in zip(models, versions):
    for task in tasks:
        datas = []
        for v in version:
            _data = read_test_result(model, task, v)
            if _data is not None:
                datas.append(_data[0:1])
        datas = pd.concat(datas, ignore_index=True)
        datas = datas[:].mean()



In [59]:
datas

test-acc    0.994609
test-auc    0.999806
test-eer    0.005731
dtype: float64

In [89]:
def display_inner_evaluation(
    models,
    tasks=["LibriSeVoc_inner", "wavefake_inner", "DECRO_english", "DECRO_chinese"],
    versions=[],
    show_latex=0,
    show_html=1,
    metric_prefix="test",
    avg_res=True,
    file_name="test",
):
    metric_auc = f"{metric_prefix}-auc"
    metric_eer = f"{metric_prefix}-eer"

    DATA = []
    for model, version in zip(models, versions):
        for task in tasks:
            datas = []
            for v in version:
                _data = read_test_result(model, task, v)
                if _data is not None:
                    datas.append(_data[0:1])
            datas = pd.concat(datas, ignore_index=True)
            datas = datas[:].mean()

            _data = datas
            if _data is None:
                continue
            res = dict(_data)
            res["model"] = model
            res["task"] = task
            DATA.append(res)

    data = pd.DataFrame(DATA)

    data2 = data.groupby("model").mean(numeric_only=True).reset_index()
    data2["task"] = "mean"
    data = pd.concat([data, data2], ignore_index=True)

    data["res"] = data.apply(
        lambda x: format_res(x, acc=0, auc=1, eer=1, metric_prefix=metric_prefix),
        axis=1,
    )
    data = data.pivot(index="model", columns="task", values="res").rename_axis(None, axis=1).reset_index()

    data = data.set_index("model")
    columns = tasks
    if avg_res:
        columns += ["mean"]
    data = data[columns]
    data = data.loc[models]

    data = format_numeric_of_df_columns(data)
    
    if show_html:
        display(HTML(data.to_html()))
    if show_latex:
        print(data.style.to_latex(column_format="lrr"))
    return data

In [90]:
data = display_inner_evaluation(models, versions=versions)



Unnamed: 0_level_0,LibriSeVoc_inner,wavefake_inner,DECRO_english,DECRO_chinese,mean
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LCNN,99.96/0.90,99.98/0.64,99.96/0.90,99.88/1.43,99.94/0.97
RawNet2,95.00/6.94,97.93/6.95,99.37/3.68,99.32/3.85,97.91/5.36
RawGAT,99.89/1.45,99.92/1.25,99.89/1.45,99.87/1.56,99.89/1.43
Wave2Vec2,100.00/0.09,99.99/0.44,99.99/0.47,99.98/0.43,99.99/0.35
WaveLM,100.00/0.03,100.00/0.26,99.98/0.55,99.99/0.41,99.99/0.31
LibriSeVoc,99.44/2.86,99.37/3.93,99.42/3.42,99.10/4.36,99.33/3.64
AudioClip,99.32/3.98,99.92/1.29,99.88/0.91,99.58/2.85,99.67/2.26
Wav2Clip,99.83/1.60,99.99/0.30,99.98/0.68,99.21/4.08,99.75/1.66
AASIST,99.91/1.33,99.84/1.60,99.91/1.35,99.56/2.92,99.81/1.80
SFATNet,98.33/5.95,99.89/1.52,99.85/1.75,97.50/8.17,98.89/4.35
