In [1]:
import pandas as pd
import numpy as np

In [2]:
directories = ["importances_syn_base", "importances_syn_mixed"] 
uncs = ["high", "random", "low"]
models = ["CLUE", "VarX", "VarXIG", "VarXLRP", "infoshap"]

In [23]:
for directory in directories:
    uncs_list = []
    models_list = []
    mass_accuracy_list = []
    precision_list = []
    n_noise_feature = 5 if not directory == "importances_syn_mixed" else 10
    for model in models:
        for unc in uncs:
            df = pd.read_csv(f"data/{directory}/{model}_{unc}U_importances_n_40000_s_2.00_n_test_1500_n_exp_200.csv")
            mass_accuracy = df[[("noise_feature" in fn) for fn in df["feature_name"]]]["feature_importance"].sum()/df["feature_importance"].sum()
            precision = np.sum([("noise_feature" in fn) for fn in df["feature_name"][:n_noise_feature]])/n_noise_feature
            print(f"{model} {unc} mass accuracy: {mass_accuracy:.5f}, precision: {precision:.5f}")
            uncs_list.append(unc)
            models_list.append(model)
            mass_accuracy_list.append(mass_accuracy)
            precision_list.append(precision)

     
    print(f"----- {directory} completed -----------------------------------------------------")
    df_res = pd.DataFrame({"unc": uncs_list, "model": models_list, "mass_accuracy": mass_accuracy_list, "precision": precision_list})
    df_res.to_csv(f"data/{directory}/noise_feature_global_loc_metrics.csv", index=False)


CLUE high mass accuracy: 0.08613, precision: 0.80000
CLUE random mass accuracy: 0.06567, precision: 0.00000
CLUE low mass accuracy: 0.04861, precision: 0.00000
VarX high mass accuracy: 0.58998, precision: 1.00000
VarX random mass accuracy: 0.55869, precision: 1.00000
VarX low mass accuracy: 0.56033, precision: 1.00000
VarXIG high mass accuracy: 0.54846, precision: 1.00000
VarXIG random mass accuracy: 0.48602, precision: 1.00000
VarXIG low mass accuracy: 0.18664, precision: 1.00000
VarXLRP high mass accuracy: 0.51134, precision: 1.00000
VarXLRP random mass accuracy: 0.54370, precision: 0.60000
VarXLRP low mass accuracy: 0.24050, precision: 1.00000
infoshap high mass accuracy: 0.20920, precision: 1.00000
infoshap random mass accuracy: 0.18030, precision: 0.80000
infoshap low mass accuracy: 0.17162, precision: 0.80000
----- importances_syn_base completed -----------------------------------------------------
CLUE high mass accuracy: 0.07105, precision: 0.40000
CLUE random mass accuracy: 0.

In [3]:

run_id_list = []
uncs_list = []
models_list = []
mass_accuracy_list = []
precision_list = []
n_noise_feature = 5 #if not directory == "importances_mult_syn_mixed" else 10
for run_id in range(0, 12):
    for model in models:
        for unc in uncs:
            df = pd.read_csv(f"data/importances_mult_syn_base/{model}_{unc}U_importances_n_40000_s_2.00_n_test_1500_n_exp_200_run_{run_id}.csv")
            mass_accuracy = df[[("noise_feature" in fn) for fn in df["feature_name"]]]["feature_importance"].sum()/df["feature_importance"].sum()
            precision = np.sum([("noise_feature" in fn) for fn in df["feature_name"][:n_noise_feature]])/n_noise_feature
            print(f"{model} {unc} mass accuracy: {mass_accuracy:.5f}, precision: {precision:.5f}")
            run_id_list.append(run_id)
            uncs_list.append(unc)
            models_list.append(model)
            mass_accuracy_list.append(mass_accuracy)
            precision_list.append(precision)


df_res = pd.DataFrame({"run": run_id_list, "unc": uncs_list, "model": models_list, "mass_accuracy": mass_accuracy_list, "precision": precision_list})
df_res_agg = df_res.groupby(["unc", "model"]).agg({"mass_accuracy": ["mean", "std"], "precision": ["mean", "std"]}).reset_index()
df_res_agg.columns = df_res_agg.columns.map('.'.join).str.strip('.')
df_res_agg.to_csv("data/importances_mult_syn_base/noise_feature_global_loc_metrics.csv", index=False)

CLUE high mass accuracy: 0.08651, precision: 0.80000
CLUE random mass accuracy: 0.06684, precision: 0.00000
CLUE low mass accuracy: 0.04826, precision: 0.00000
VarX high mass accuracy: 0.49688, precision: 1.00000
VarX random mass accuracy: 0.44758, precision: 1.00000
VarX low mass accuracy: 0.60072, precision: 1.00000
VarXIG high mass accuracy: 0.51531, precision: 1.00000
VarXIG random mass accuracy: 0.43487, precision: 1.00000
VarXIG low mass accuracy: 0.15667, precision: 1.00000
VarXLRP high mass accuracy: 0.48455, precision: 1.00000
VarXLRP random mass accuracy: 0.46250, precision: 1.00000
VarXLRP low mass accuracy: 0.20780, precision: 1.00000
infoshap high mass accuracy: 0.10958, precision: 0.60000
infoshap random mass accuracy: 0.09821, precision: 0.60000
infoshap low mass accuracy: 0.11638, precision: 0.60000
CLUE high mass accuracy: 0.08363, precision: 0.80000
CLUE random mass accuracy: 0.06568, precision: 0.00000
CLUE low mass accuracy: 0.05242, precision: 0.00000
VarX high mas