In [1]:
import os

import pandas as pd
from IPython.display import HTML, display
from ay2.tools.pandas import format_numeric_of_df_columns
from typing import List, Dict, Any, Union

In [2]:
ROOT_PATH = "/home/ay/data/DATA/1-model_save/00-Deepfake/1-df-audio-new"

# Help Functions

In [3]:
def read_test_result_from_csv_file(model, task, version=0, metric_prefix="test", file_name="test"):
    """
    Read test results from a CSV file: `f"{ROOT_PATH}/{model}/{task}/version_{version}/{file_name}.csv"`

    Args:
        model (str): The name of the model.
        task (str): The task for which the results are being read.
        version (int, optional): The version number of the model. Defaults to 0.
        metric_prefix (str, optional): The prefix used for metric columns in the CSV file. Defaults to "test".
        file_name (str, optional): The name of the CSV file containing the test results. Defaults to "test".

    Returns:
        pd.DataFrame or None: A DataFrame containing the requested metrics, or None if the file does not exist.
    """
    save_path = f"{ROOT_PATH}/{model}/{task}/version_{version}"
    csv_path = os.path.join(save_path, f"{file_name}.csv")

    if not os.path.exists(csv_path):
        print("Warning!!!! cannot find: ", csv_path)
        return None

    data = pd.read_csv(csv_path)
    data = data[[f"{metric_prefix}-acc", f"{metric_prefix}-auc", f"{metric_prefix}-eer"]]
    return data

In [4]:
def generate_res_column_for_df(data:pd.DataFrame, acc=1, auc=1, eer=1, metric_prefix="test")->pd.DataFrame:
    """
    Generate a new column in the DataFrame containing formatted model metrics.

    Args:
        data (pd.DataFrame): The input DataFrame with columns for 'test-acc', 'test-auc', and 'test-eer'.
        acc (bool, optional): Include the accuracy metric in the output. Defaults to True.
        auc (bool, optional): Include the AUC metric in the output. Defaults to True.
        eer (bool, optional): Include the Equal Error Rate (EER) metric in the output. Defaults to True.
        metric_prefix (str, optional): The prefix used for metric columns. Defaults to "test".

    Returns:
        pd.DataFrame: The input DataFrame with an additional column 'res' containing formatted metrics.
    """
    
    def help_format(x):
        _acc = "{:.2f}".format(x[f"{metric_prefix}-acc"] * 100)
        _auc = "{:.2f}".format(x[f"{metric_prefix}-auc"] * 100)
        _eer = "{:.2f}".format(x[f"{metric_prefix}-eer"] * 100)
        
        res = []
        if acc: res.append(_acc)
        if auc: res.append(_auc)
        if eer: res.append(_eer)
        res = "/".join(res) or ""
    
    data = data.apply(
        lambda x: help_format(x),
        axis=1,
    )
    
    return data

In [5]:
_versions = [0, 1, 2, 3, 4, 6, 7, 8, 9, 10]
MODELs_VERSIONS = {
    "LCNN": _versions,
    "RawNet2": _versions,
    "RawGAT": _versions,
    "Wave2Vec2": _versions,
    "WaveLM": _versions,
    "LibriSeVoc": _versions,
    "AudioClip": _versions,
    "Wav2Clip": _versions,
    "AASIST": _versions,
    "SFATNet": _versions,
    "ASDG": _versions,
    "Ours/ResNet":_versions,
}
models = MODELs_VERSIONS.keys()
versions = MODELs_VERSIONS.values()

In [6]:
tasks=["LibriSeVoc_inner", "wavefake_inner", "DECRO_english", "DECRO_chinese"]

for model, version in zip(models, versions):
    for task in tasks:
        datas = []
        for v in version:
            _data = read_test_result(model, task, v)
            if _data is not None:
                datas.append(_data[0:1])
        datas = pd.concat(datas, ignore_index=True)
        datas = datas[:].mean()

NameError: name 'read_test_result' is not defined

In [7]:
class BaseReadResult(object):
    def __init__(self, model, versions: List[int]=[], task=None, calc_avg=True, acc=0,auc=1, eer=1, metric_prefix="test", file_name="test", late_read=False):
        assert len(models) == len(versions)

        self.acc=acc
        self.auc=auc
        self.eer=eer
        self.metric_prefix = metric_prefix
        self.task = task
        self.calc_avg = calc_avg
        self.file_name=file_name
        self.model_names = []
        for model, version in zip(models, versions):
            if isinstance(version, int):
                self.model_names.append([model, version, model])
            else:
                for _v in version:
                    self.model_names.append([model, _v, model + f"-{_v}"])

        self.configure_Columns()

        if not late_read:
            self.data = self.read_all_datas(calc_avg=calc_avg)

    def configure_Columns(self):
        raise NotImplementedError
        # self.METHODs = []

    def set_Columns(self, columns):
        self.METHODs = columns
        self.data = self.read_all_datas(calc_avg=self.calc_avg)
        
    
    def post_read_operation(self, data):
        return data
    
    def read_data_for_model(self, model, version, model_name, calc_avg=True):
        _data = read_test_result(model, self.task, version, metric_prefix=self.metric_prefix, file_name=self.file_name)
        if _data is None:
            return None

        _data = self.post_read_operation(_data)
        
        res = _data
        if calc_avg:
            res.loc["Avg", :] = res.mean()
        res["res"] = res.apply(
            lambda x: format_res(x, acc=self.acc, auc=self.auc, eer=self.eer, metric_prefix=self.metric_prefix),
            axis=1,
        )
        res["model"] = model_name
        res["dataset"] = self.task.split("_")[0]
        try:
            res["method"] = self.METHODs
        except ValueError as e:
            print(model, version, e)
            return None
        return res

    def read_all_datas(self, calc_avg=True):
        DATA = []
        model_names = []
        for model, version, model_name in self.model_names:
            _data = self.read_data_for_model(model, version, model_name, calc_avg=calc_avg)
            if _data is not None:
                DATA.append(_data)
            model_names.append(model_name)

        data = pd.concat(DATA)
        data = data.pivot(index="model", columns=["method"], values="res").reset_index(drop=False).set_index("model")
        models2 = [x for x in model_names if x in data.index]
        data = data.loc[models2]
        data = data[self.METHODs]
        return data

    def display(self, show_latex=0, drop=None):
        data = self.data
        if drop is not None:
            data = self.data.drop(drop, axis=1)
        display(HTML(data.to_html()))
        if show_latex:
            res = data.style.to_latex(column_format="lrr")
            res = res.replace('_', '-').replace('$', '')
            print(res)

In [8]:
def display_inner_evaluation(
    models,
    tasks=["LibriSeVoc_inner", "wavefake_inner", "DECRO_english", "DECRO_chinese"],
    versions=[],
    show_latex=0,
    show_html=1,
    metric_prefix="test",
    avg_res=True,
    file_name="test",
):
    metric_auc = f"{metric_prefix}-auc"
    metric_eer = f"{metric_prefix}-eer"

    DATA = []
    for model, version in zip(models, versions):
        for task in tasks:
            datas = []
            for v in version:
                _data = read_test_result(model, task, v)
                if _data is not None:
                    datas.append(_data[0:1])
            datas = pd.concat(datas, ignore_index=True)
            datas = datas[:].mean()

            _data = datas
            if _data is None:
                continue
            res = dict(_data)
            res["model"] = model
            res["task"] = task
            DATA.append(res)

    data = pd.DataFrame(DATA)

    data2 = data.groupby("model").mean(numeric_only=True).reset_index()
    data2["task"] = "mean"
    data = pd.concat([data, data2], ignore_index=True)

    data["res"] = data.apply(
        lambda x: format_res(x, acc=0, auc=1, eer=1, metric_prefix=metric_prefix),
        axis=1,
    )
    data = data.pivot(index="model", columns="task", values="res").rename_axis(None, axis=1).reset_index()

    data = data.set_index("model")
    columns = tasks
    if avg_res:
        columns += ["mean"]
    data = data[columns]
    data = data.loc[models]

    data = format_numeric_of_df_columns(data)
    
    if show_html:
        display(HTML(data.to_html()))
    if show_latex:
        print(data.style.to_latex(column_format="lrr"))
    return data

In [None]:
data = display_inner_evaluation(models, versions=versions)