This file contains the code for the tables for the rebuttal

In [1]:
import pandas as pd
import glob

# ---------------------------------------
# Load all CSVs for wdbc
# ---------------------------------------
csv_files = glob.glob("csv/realdata/wdbc_SL_np_seed*.csv")
df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# ---------------------------------------
# Apply the renaming map
# ---------------------------------------
rename_map = {
    'CPI': 'Sobol-CPI(1)',
    'S-CPI': 'Sobol-CPI(10)',
    'S-CPI2': 'Sobol-CPI(100)',
    'CPI_n': 'Sobol-CPI(1)_n',
    'S-CPI_n': 'Sobol-CPI(10)_n',
    'S-CPI2_n': 'Sobol-CPI(100)_n',
    'CPI_sqd': 'Sobol-CPI(1)_n2',
    'S-CPI_sqd': 'Sobol-CPI(10)_n2',
    'S-CPI2_sqd': 'Sobol-CPI(100)_n2',
    'CPI_sqrt': 'Sobol-CPI(1)_sqrt',
    'S-CPI_sqrt': 'Sobol-CPI(10)_sqrt',
    'S-CPI2_sqrt': 'Sobol-CPI(100)_sqrt',
    'CPI_bt': 'Sobol-CPI(1)_bt',
    'S-CPI_bt': 'Sobol-CPI(10)_bt',
    'S-CPI2_bt': 'Sobol-CPI(100)_bt',
    'LOCO_sqd': 'LOCO_n2',
    'CPI_ST': 'Sobol-CPI(1)_ST',
    'CPI_wilcox': 'Sobol-CPI(1)_wilcox'
}
df["method"] = df["method"].replace(rename_map)

# ---------------------------------------
# Compute null index and statistics
# ---------------------------------------
imp_cols = [c for c in df.columns if c.startswith("imp_V")]
p = len(imp_cols)
null_idx = p - 1

pval_cols = [c for c in df.columns if c.startswith("pval")]
pval_nonnull = pval_cols[:-1]

df["discoveries"] = (df[pval_nonnull] < 0.05).sum(axis=1)
df["type_I_error"] = (df[f"pval{null_idx}"] < 0.05).astype(int)

# ---------------------------------------
# Filter only corr=0.9 and selected methods
# ---------------------------------------
methods_of_interest = [
    "Sobol-CPI(1)_wilcox",
    "LOCO_wilcox",
    "LOCO-W",
    "LOCO_sqrt"
]

df_filt = df[(df["corr"] == 0.9) &
             (df["method"].isin(methods_of_interest))]

# ---------------------------------------
# Compute the means table
# ---------------------------------------
table = (
    df_filt
    .groupby("method")[["discoveries", "type_I_error", "tr_time"]]
    .mean()
)

print(table)


                     discoveries  type_I_error     tr_time
method                                                    
LOCO-W                      9.06          0.40  179.516268
LOCO_sqrt                   0.00          0.00  135.418942
LOCO_wilcox                 7.32          0.34  135.022021
Sobol-CPI(1)_wilcox         4.14          0.06   10.616445


In [4]:
import pandas as pd
import glob

# ---------------------------------------
# Load all CSVs for wdbc
# ---------------------------------------
csv_files = glob.glob("csv/realdata/wdbc_SL_np_seed*.csv")
df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# ---------------------------------------
# Apply renaming map
# ---------------------------------------
rename_map = {
    'CPI': 'Sobol-CPI(1)',
    'S-CPI': 'Sobol-CPI(10)',
    'S-CPI2': 'Sobol-CPI(100)',
    'CPI_n': 'Sobol-CPI(1)_n',
    'S-CPI_n': 'Sobol-CPI(10)_n',
    'S-CPI2_n': 'Sobol-CPI(100)_n',
    'CPI_sqd': 'Sobol-CPI(1)_n2',
    'S-CPI_sqd': 'Sobol-CPI(10)_n2',
    'S-CPI2_sqd': 'Sobol-CPI(100)_n2',
    'CPI_sqrt': 'Sobol-CPI(1)_sqrt',
    'S-CPI_sqrt': 'Sobol-CPI(10)_sqrt',
    'S-CPI2_sqrt': 'Sobol-CPI(100)_sqrt',
    'CPI_bt': 'Sobol-CPI(1)_bt',
    'S-CPI_bt': 'Sobol-CPI(10)_bt',
    'S-CPI2_bt': 'Sobol-CPI(100)_bt',
    'LOCO_sqd': 'LOCO_n2',
    'CPI_ST': 'Sobol-CPI(1)_ST',
    'CPI_wilcox': 'Sobol-CPI(1)_wilcox'
}
df["method"] = df["method"].replace(rename_map)

# ---------------------------------------
# Compute null importance index
# ---------------------------------------
imp_cols = [c for c in df.columns if c.startswith("imp_V")]
p = len(imp_cols)
null_idx = p - 1
df["null_importance"] = df[f"imp_V{null_idx}"]

# ---------------------------------------
# Methods of interest
# ---------------------------------------
methods_of_interest = [
    "Sobol-CPI(1)",
    "Sobol-CPI(10)",
    "Sobol-CPI(100)",
    "LOCO-W",
    "LOCO",
    "PFI"
]

# ---------------------------------------
# Filter wdbc, corr=0.9 and desired methods
# ---------------------------------------
df_filt = df[
    (df["corr"] == 0.9) &
    (df["method"].isin(methods_of_interest))
]

# ---------------------------------------
# Compute mean & variance
# ---------------------------------------
table = (
    df_filt
    .groupby("method")["null_importance"]
    .agg(['mean', 'std'])
    .rename(columns={'mean': 'null_importance_mean',
                     'std': 'null_importance_std'})
)

print(table)


                null_importance_mean  null_importance_std
method                                                   
LOCO                        0.000207             0.001688
LOCO-W                      0.001980             0.006060
PFI                         0.026793             0.037477
Sobol-CPI(1)                0.000040             0.000353
Sobol-CPI(10)               0.000019             0.000503
Sobol-CPI(100)              0.000033             0.000512
