In [1]:
from util import *

from src.dataset import load_hospital
from src.counterfactual import get_baseline_counterfactuals

import joblib
import time

import warnings
warnings.filterwarnings('ignore')
                        
# Get a model
dataset = 'hospital'
model, encoder, scaler = joblib.load(f'models/{dataset}.gz') # Model should have the BlackBox interface
cols = load_columns(dataset)

2023-01-30 22:20:11.874956: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
loader = get_loader(dataset)
X_corpus, _ = next(loader)
X_test, _ = next(loader)

simplex = get_simplex(model, X_corpus, X_test, verbose = True)

Weight Fitting Epoch: 2000/10000 ; Error: 24.2 ; Regulator: 216 ; Reg Factor: 1
Weight Fitting Epoch: 4000/10000 ; Error: 6.39 ; Regulator: 21.4 ; Reg Factor: 1
Weight Fitting Epoch: 6000/10000 ; Error: 1.39 ; Regulator: 5.64 ; Reg Factor: 1
Weight Fitting Epoch: 8000/10000 ; Error: 0.862 ; Regulator: 1.94 ; Reg Factor: 1
Weight Fitting Epoch: 10000/10000 ; Error: 0.714 ; Regulator: 0.72 ; Reg Factor: 1


In [3]:
%%time
test_id = 1

cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder)

baseline_cfs = get_baseline_counterfactuals(model = model, target = desired_class, test = x, \
                                            corpus = X_corpus)

cf_proto_cf = get_cfproto_cf(X_corpus, model, x)




2023-01-30 22:21:24.726033: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
No encoder specified. Using k-d trees to represent class prototypes.
2023-01-30 22:21:24.925164: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled


CPU times: user 6min 5s, sys: 29.2 s, total: 6min 34s
Wall time: 53.5 s


In [4]:
display_tabular_cfs([cfs, baseline_cfs, cf_proto_cf], model, x, scaler, encoder, cols, f'results/{dataset}_sample')

Unnamed: 0,original,SimplexCF_counterfactual_1,SimplexCF_counterfactual_2,SimplexCF_counterfactual_3,NN_counterfactual_1,CFProto_counterfactual_1
Gender,F,F,F,M,M,F
Neighbourhood,FONTE GRANDE,FONTE GRANDE,CARATOÍRA,MARIA ORTIZ,MARIA ORTIZ,FONTE GRANDE
Scholarship,0,0,0,0,0,0
Hipertension,0,0,0,0,0,0
Diabetes,0,0,0,0,0,0
Alcoholism,0,0,0,0,0,0
SMS_received,0,0,0,0,0,0
Handcap,0,1,1,1,1,1
Age,8,8,28,28,40,8
ScheduleDays,30,30,12,12,11,30


# Comparison

In [5]:
from tqdm import tqdm 

times = []
sparsity = []
success_rate = []
in_distribution = []

from sklearn.svm import OneClassSVM
ood_detector = OneClassSVM()
ood_detector.fit(X_test)

for test_id in tqdm(range(50)):
    start = time.perf_counter()
    cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, n_cfs = 1)
    end = time.perf_counter()
    
    start_b = time.perf_counter()
    baseline_cfs = get_baseline_counterfactuals(model = model, target = desired_class, test = x, \
                                            corpus = X_corpus)
    end_b = time.perf_counter()
    
    start_c = time.perf_counter()
    cf_proto_cf = get_cfproto_cf(X_corpus, model, x)
    end_c = time.perf_counter()
    
    times.append([end - start, end_b - start_b, end_c - start_c])
    sparsity.append([(cfs[0] != x).sum(), (baseline_cfs[0] != x).sum(), (cf_proto_cf[0] != x).sum()])
    success_rate.append([cfs is not None, baseline_cfs is not None, cf_proto_cf is not None])
    in_distribution.append([ood_detector.score_samples(cfs).mean(),ood_detector.score_samples(baseline_cfs).mean(), ood_detector.score_samples(cf_proto_cf).mean()])

  0%|          | 0/50 [00:00<?, ?it/s]No encoder specified. Using k-d trees to represent class prototypes.
  2%|▏         | 1/50 [00:52<42:56, 52.59s/it]No encoder specified. Using k-d trees to represent class prototypes.
  4%|▍         | 2/50 [01:44<41:45, 52.21s/it]No encoder specified. Using k-d trees to represent class prototypes.
  6%|▌         | 3/50 [02:35<40:21, 51.52s/it]No encoder specified. Using k-d trees to represent class prototypes.
  8%|▊         | 4/50 [03:26<39:30, 51.54s/it]No encoder specified. Using k-d trees to represent class prototypes.
 10%|█         | 5/50 [04:17<38:30, 51.34s/it]No encoder specified. Using k-d trees to represent class prototypes.
 12%|█▏        | 6/50 [05:09<37:45, 51.50s/it]No encoder specified. Using k-d trees to represent class prototypes.
 14%|█▍        | 7/50 [06:00<36:46, 51.32s/it]No encoder specified. Using k-d trees to represent class prototypes.
 16%|█▌        | 8/50 [06:51<35:50, 51.21s/it]No encoder specified. Using k-d trees to r

In [8]:
pd.DataFrame(times, columns  = ['simplex', 'nn', 'cfproto']).describe().to_csv(f'results/{dataset}_times.csv')
pd.DataFrame(sparsity, columns  = ['simplex', 'nn', 'cfproto']).applymap(int).describe().to_csv(f'results/{dataset}_sparsity.csv')
pd.DataFrame(success_rate, columns  = ['simplex', 'nn', 'cfproto']).applymap(int).describe().to_csv(f'results/{dataset}_success_rate.csv')
pd.DataFrame(in_distribution, columns  = ['simplex', 'nn', 'cfproto']).describe().to_csv(f'results/{dataset}_in_distribution.csv')

w_c, x_c, proj_jacobian_c = cfs_results[0][0][0] # i is what counterfactual, j is the results of that counterfactaul, k is the explanation base on corpus in i,j

In [9]:
ood_detector.score_samples(X_test).mean()

43.42385162494774