In [19]:
import sys
import os
sys.path.append("../src")
from utils import select_model, get_dataset, low_density_anomalies

In [20]:
experiment_path = "../results/epoch_budget/DTEC_DSIL_deterministic_0.5_s0_T400_bins7/18_Ionosphere"
experiment_config = os.path.join(experiment_path, "experiment_config.yaml")

from hydra import initialize, compose
import torch
from pathlib import Path
import yaml

In [21]:
def load_model_and_dataset_from_path(experiment_path, X):
    cfg_experiment_path = Path(f"{experiment_path}/experiment_config.yaml")
    model_path = f"{experiment_path}/model.pth"

    with initialize(config_path=str(experiment_path), version_base=None):
        cfg = compose(config_name=cfg_experiment_path.name)

    model = select_model(cfg.model, device="cuda:0" if torch.cuda.is_available() else "cpu")
    model.load_model(model_path, X)
    return model, cfg 

In [22]:
def load_dataset(experiment_path):
    cfg_experiment_path = Path(f"{experiment_path}/experiment_config.yaml")
    # Load as yaml
    with open(cfg_experiment_path, 'r') as stream:
        config = yaml.safe_load(stream)
        if 'dataset_path' in config['dataset'].keys():
            config['dataset']['dataset_path']   = "../" +  config['dataset']['dataset_path']
        else:   
            config['dataset']['dataset_path']   = "../" +  config['dataset']['data_path']

    with initialize(config_path=str(experiment_path), version_base=None):
        cfg = compose(config_name=cfg_experiment_path.name)
    dataset = get_dataset(cfg)
    return dataset

In [23]:

dataset = load_dataset(experiment_path)
model, cfg = load_model_and_dataset_from_path(experiment_path, dataset['X_train'])

generating duplicate samples for dataset 18_Ionosphere...
{'Samples': 1000, 'Features': 32, 'Anomalies': 339, 'Anomalies Ratio(%)': 33.9}


In [24]:
# Get anomaly score of every samples
def get_anomaly_score(model, X):
    model.model.eval()
    with torch.no_grad():
        X = torch.from_numpy(X).float()
        X = X.to(model.device)
        anomaly_score = model.predict_score(X)
    return anomaly_score

anomaly_scores = get_anomaly_score(model, dataset['X_test'])
y_pred = low_density_anomalies(-anomaly_scores, num_anomalies=dataset['y_test'].sum())
 # Get intersection between prediction and ground truth, both are 1 array of 0 and 1
true_positive_indices = (y_pred == 1) & (dataset['y_test'] == 1)
data_to_explain = dataset['X_test'][true_positive_indices]
data_to_explain

array([[-1.31547343, -0.06933104, -1.22675975, ...,  1.95154   ,
        -0.71997719, -0.0683503 ],
       [ 0.62245261, -0.13114821, -3.22847631, ...,  0.37796339,
        -0.84339789,  0.84201884],
       [-1.31547343, -0.06933104, -1.22675975, ..., -0.56313922,
        -1.11931389, -2.10464231],
       ...,
       [ 0.10715005,  0.8260163 , -1.22675975, ..., -0.02722917,
        -0.71997719, -0.0683503 ],
       [-0.2720851 , -2.2334152 ,  0.77495682, ...,  0.43160783,
        -0.71997719, -0.0683503 ],
       [-1.31547343, -0.06933104, -1.22675975, ..., -0.02722917,
        -0.71997719, -0.0683503 ]])

In [25]:
from sklearn.metrics import precision_score, recall_score, f1_score
from adbench.myutils import Utils
utils = Utils()
f1_score = f1_score(dataset['y_test'], y_pred)
result = utils.metric(y_true=dataset['y_test'], y_score=anomaly_scores)
print(f'AUCROC: {result["aucroc"]}')

AUCROC: 0.9208270297528997


In [26]:
data_to_explain.shape

(271, 32)

In [27]:
from shap import KernelExplainer
import shap
import numpy as np

In [28]:
# Take on random sample from the dataset
import random
import numpy as np
from tqdm.notebook import tqdm

explainer= KernelExplainer(model.predict_score, data=np.zeros((1, dataset['X_test'].shape[1])), silent=True)
shap_expl = explainer.shap_values(data_to_explain, nsamples=50, show_progress=False, silent=True)

step=20
w_explanations = np.array(model.instance_explanation(data_to_explain, agg="weighted_mean", step=step))
mean_explanations = np.array(model.instance_explanation(data_to_explain, agg="mean", step=step))
max_explanations  = np.array(model.instance_explanation(data_to_explain, agg="max", step=step))



In [29]:
shap_expl = np.array(shap_expl)
shap_expl.shape

(271, 32)

In [30]:
def infidelity(x, model, attribution, num_samples=50, delta_std=0.01):
    x= x.reshape(1, -1)
    attribution = attribution.reshape(1, -1)
    # Scale so that the sum is 1
    attribution = attribution / np.sum(attribution)
    n_features = x.shape[1]
    f_x = model.predict_score(x)
    infidelities = []
    for _ in range(num_samples):
        delta = np.random.normal(0, delta_std, size=(1, n_features))
        f_x_delta = model.predict_score(x - delta)
        dot = np.dot(delta.T, attribution)
        error = (dot - f_x + f_x_delta)**2
        infidelities.append(error)
    return np.mean(infidelities)

In [31]:
len(data_to_explain)

271

In [32]:
inf_our_list = []
inf_shap_list = []
inf_mean_list = []
inf_max_list = []
for i in tqdm(range(len(data_to_explain))):
    x_i = data_to_explain[i,:]
    inf_our_list.append(infidelity(x_i, model, w_explanations[i], num_samples=100, delta_std=0.01))
    inf_shap_list.append(infidelity(x_i, model, shap_expl[i], num_samples=100, delta_std=0.01))
    inf_mean_list.append(infidelity(x_i, model, mean_explanations[i], num_samples=100, delta_std=0.01))
    inf_max_list.append(infidelity(x_i, model, max_explanations[i], num_samples=100, delta_std=0.01))
inf_our_list = np.array(inf_our_list)
inf_shap_list = np.array(inf_shap_list)
inf_mean_list = np.array(inf_mean_list)
inf_max_list = np.array(inf_max_list)
inf_our_list.mean(), inf_max_list.mean(), inf_mean_list.mean(), inf_shap_list.mean()

  0%|          | 0/271 [00:00<?, ?it/s]

(0.00013624108206129908,
 0.00013713608048050786,
 0.0001371944278633429,
 0.00014189677705700476)

In [33]:
import os
import pandas as pd
dataset_name = experiment_path.split("/")[-1]
inf_our_list = pd.DataFrame(inf_our_list, columns=["inf_our"])
inf_shap_list = pd.DataFrame(inf_shap_list, columns=["inf_shap"])
inf_mean_list = pd.DataFrame(inf_mean_list, columns=["inf_mean"])
inf_max_list = pd.DataFrame(inf_max_list, columns=["inf_max"])
inf_list = pd.concat([inf_our_list, inf_shap_list, inf_mean_list, inf_max_list], axis=1)
inf_list.to_csv(os.path.join(f"infidelity_{dataset_name}.csv"), index=False)

In [34]:
model.predict_score(data_to_explain[0,:].reshape(1, -1))

array([5.98060996])

In [None]:
from scipy.stats import pearsonr

def faithfulness(f, g, x, x_baseline, subset_size=5, num_samples=100):
    d = x.shape[0]
    attributions = g(x)  # Vector of shape (d,)

    attr_sums = []
    output_diffs = []

    for _ in range(num_samples):
        # Randomly select a subset of feature indices
        S = np.random.choice(d, size=subset_size, replace=False)

        # Attribution sum over S
        sum_attr = np.sum(attributions[S])

        # Create x with features in S replaced by baseline values
        x_masked = x.copy()
        x_masked[S] = x_baseline[S]

        # Difference in model output
        delta_f = f(x.reshape(1, -1)) - f(x_masked.reshape(1, -1))

        attr_sums.append(sum_attr)
        output_diffs.append(delta_f)
    attr_sums = np.array(attr_sums)
    output_diffs = np.array(output_diffs)
    # Compute Pearson correlation between attribution sums and output differences
    corr, _ = pearsonr(attr_sums, output_diffs.squeeze())
    # Replace NaN with 0
    if np.isnan(corr):
        corr = 0
    return corr

x_baseline = np.zeros((dataset['X_test'].shape[1]))

import pandas as pd
faithfulness_df = pd.DataFrame()
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning, module="shap")
for k in range(3,dataset['X_train'].shape[1], 2):
    result = {}
    for explanation_method in ['shap' , 'mean', 'max', 'our']:
        if explanation_method == 'shap':
            g = lambda x: explainer.shap_values(x, nsamples=50, show_progress=False)
        elif explanation_method == 'mean':
            g = lambda x: model.instance_explanation(x, agg="mean", step=10)
        elif explanation_method == 'max':
            g = lambda x: model.instance_explanation(x, agg="max", step=10)
        elif explanation_method == 'our':
            g = lambda x: model.instance_explanation(x, agg="weighted_mean", step=10)
        faithfulness_list = []
        for idx in tqdm(range(len(data_to_explain))):
            x_i = data_to_explain[idx,:]
            faithfulness_list.append(faithfulness(model.predict_score, g, x_i, x_baseline, subset_size=k, num_samples=100))
        faithfulness_list = np.array(faithfulness_list)
        result[explanation_method] = faithfulness_list.mean()
    faithfulness_df = pd.concat([faithfulness_df, pd.DataFrame(result, index=[k])])
    print(f'k: {k}, {result}')
faithfulness_df = faithfulness_df.reset_index()

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 3, {'shap': 0.3564152155796831, 'mean': 0.6981671608715982, 'max': 0.5844218251671052, 'our': 0.6924738714016773}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 5, {'shap': 0.36703839433513186, 'mean': 0.6486167071688503, 'max': 0.5368443602406564, 'our': 0.6549995070306396}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 7, {'shap': 0.34920954031559975, 'mean': 0.6094757096906744, 'max': 0.511458975488447, 'our': 0.6214765404487361}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 9, {'shap': 0.3667236212195725, 'mean': 0.5831057194769506, 'max': 0.4752935562004571, 'our': 0.5831238866102905}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 11, {'shap': 0.37303449996525573, 'mean': 0.5559125919356586, 'max': 0.4586934032312285, 'our': 0.5649444125792887}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 13, {'shap': 0.3758745362865846, 'mean': 0.5337193108764521, 'max': 0.4429955472376341, 'our': 0.5260343554656497}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 15, {'shap': 0.398499342090926, 'mean': 0.5093277482432855, 'max': 0.4218176770030802, 'our': 0.5083314764209138}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 17, {'shap': 0.4146366459284928, 'mean': 0.49128910706683515, 'max': 0.40110407231320316, 'our': 0.49645685831645414}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

k: 19, {'shap': 0.4352630444017832, 'mean': 0.4760487843106983, 'max': 0.408184090605558, 'our': 0.4771817793811274}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

In [None]:
faithfulness_df.to_csv(os.path.join(f"faithfulness_{dataset_name}.csv"), index=False)


In [None]:
from sklearn.metrics import mean_squared_error
import pandas as pd
def compute_fidelity_regression(x, f, g, train_set, top_k=5, n_samples=100, random_state=42):
    rng = np.random.default_rng(random_state)
    idx = rng.choice(len(train_set), size=n_samples, replace=False)
    X_subset = train_set[idx]
    x = x.reshape(1, -1)

    importances = g(x).reshape(1,-1) # shape: (n_samples, n_features)
    important_indices = np.argsort(-np.abs(importances), axis=1)[:, :top_k]  # shape: (n_samples, top_k)
    score_orig = f(x)

    # Create the perturbed dataset
    X_subset[:, important_indices] = x[:,important_indices]
    score_perturbed = f(X_subset) 
    score_orig = score_orig.repeat(n_samples, axis=0)
    

    fidelity_score = mean_squared_error(score_orig, score_perturbed)
    return fidelity_score
fidelity_result_pd = pd.DataFrame()
for k in range(3,dataset['X_train'].shape[1], 2):
    fidelity_result = {"k": k}
    for explanation_method in ["shap", 'mean', 'max', 'our']:
        if explanation_method == 'shap':
            g = lambda x: explainer.shap_values(x, nsamples=50, show_progress=False,silent=True)
        elif explanation_method == 'mean':
            g = lambda x: model.instance_explanation(x, agg="mean", step=10)
        elif explanation_method == 'max':
            g = lambda x: model.instance_explanation(x, agg="max", step=10)
        elif explanation_method == 'our':
            g = lambda x: model.instance_explanation(x, agg="weighted_mean", step=10)
        fidelity_list = []
        for idx in tqdm(range(len(data_to_explain))):
            x_i = data_to_explain[idx,:]
            fidelity_list.append(compute_fidelity_regression(x_i, model.predict_score, g, train_set=dataset['X_train'], top_k=k, n_samples=100))
        fidelity_list = np.array(fidelity_list)
        fidelity_result[explanation_method] = fidelity_list.mean()
    print(f'{fidelity_result}')
    fidelity_result_pd = pd.concat([fidelity_result_pd, pd.DataFrame(fidelity_result, index=[0])], axis=0)
fidelity_result_pd = fidelity_result_pd.reset_index(drop=True)

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

{'k': 3, 'shap': 4.644011665427498, 'mean': 5.669872974265195, 'max': 5.8890854860662865, 'our': 5.770517617346997}


  0%|          | 0/271 [00:00<?, ?it/s]

  0%|          | 0/271 [00:00<?, ?it/s]

In [None]:
fidelity_result_pd

Unnamed: 0,k,shap,mean,max,our
0,3,0.179974,0.057959,0.292865,0.116938
1,5,0.010097,0.039677,0.021062,0.01886
2,7,0.007667,0.008114,0.010811,0.007752
3,9,0.006361,0.005943,0.005293,0.005715
4,11,0.007296,0.005734,0.004929,0.006054
5,13,0.005507,0.005324,0.004445,0.00511
6,15,0.005265,0.004297,0.00287,0.004564
7,17,0.004052,0.002323,0.001528,0.002739
8,19,0.002219,0.001157,0.000529,0.000855


In [None]:
faithfulness_df

Unnamed: 0,index,shap,mean,max,our
0,3,0.501464,0.732427,0.73812,0.716208
1,5,0.446766,0.656508,0.662099,0.655493
2,7,0.466934,0.599873,0.576228,0.575841
3,9,0.509156,0.533887,0.525944,0.541271
4,11,0.526958,0.512099,0.492259,0.516931
5,13,0.602868,0.520093,0.517452,0.522195
6,15,0.648002,0.566589,0.540341,0.566938
7,17,0.706596,0.593542,0.575458,0.588681
8,19,0.767367,0.641968,0.624331,0.645151


In [None]:
# Save everything, infidelity, faithfulness and fidelity

fidelity_result_pd.to_csv(os.path.join(f"fidelity_{dataset_name}.csv"), index=False)

print(f"Saved infidelity, faithfulness and fidelity to {dataset_name}.csv")