In [1]:
from util import *

from src.dataset import load_adult
from src.counterfactual import get_baseline_counterfactuals

import joblib
import time

import warnings
warnings.filterwarnings('ignore')
                        
# Get a model
dataset = 'adult'
model, encoder, scaler = joblib.load(f'models/{dataset}.gz') # Model should have the BlackBox interface
cols = load_columns(dataset)

2023-01-30 21:58:00.912321: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
loader = get_loader(dataset)
X_corpus, _ = next(loader)
X_test, _ = next(loader)

simplex = get_simplex(model, X_corpus, X_test, verbose = True)

Weight Fitting Epoch: 2000/10000 ; Error: 100 ; Regulator: 369 ; Reg Factor: 1
Weight Fitting Epoch: 4000/10000 ; Error: 18.7 ; Regulator: 58.4 ; Reg Factor: 1
Weight Fitting Epoch: 6000/10000 ; Error: 8.96 ; Regulator: 12.8 ; Reg Factor: 1
Weight Fitting Epoch: 8000/10000 ; Error: 6.91 ; Regulator: 4.38 ; Reg Factor: 1
Weight Fitting Epoch: 10000/10000 ; Error: 6.01 ; Regulator: 1.67 ; Reg Factor: 1


In [3]:
%%time
test_id = 1

cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder)

baseline_cfs = get_baseline_counterfactuals(model = model, target = desired_class, test = x, \
                                            corpus = X_corpus)

cf_proto_cf = get_cfproto_cf(X_corpus, model, x)




2023-01-30 21:59:09.720774: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
No encoder specified. Using k-d trees to represent class prototypes.
2023-01-30 21:59:09.949868: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled


CPU times: user 5min, sys: 35 s, total: 5min 35s
Wall time: 50.4 s


In [4]:
display_tabular_cfs([cfs, baseline_cfs, cf_proto_cf], model, x, scaler, encoder, cols, f'results/{dataset}_sample')

Unnamed: 0,original,SimplexCF_counterfactual_1,SimplexCF_counterfactual_2,SimplexCF_counterfactual_3,NN_counterfactual_1,CFProto_counterfactual_1
workclass,State-gov,State-gov,State-gov,State-gov,Local-gov,State-gov
education,Some-college,Some-college,Bachelors,Bachelors,Some-college,10th
marital-status,Divorced,Divorced,Married-civ-spouse,Married-civ-spouse,Married-civ-spouse,Widowed
occupation,Other-service,Other-service,Other-service,Other-service,Exec-managerial,Other-service
relationship,Unmarried,Unmarried,Unmarried,Unmarried,Husband,Unmarried
race,White,White,White,White,White,White
gender,Male,Male,Male,Male,Male,Male
native-country,United-States,United-States,United-States,United-States,United-States,United-States
capital-gain,0,15024,0,2072,0,4495
capital-loss,0,0,0,113,0,0


# Comparison

In [5]:
from tqdm import tqdm 

times = []
sparsity = []
success_rate = []
in_distribution = []

from sklearn.svm import OneClassSVM
ood_detector = OneClassSVM()
ood_detector.fit(X_test)

for test_id in tqdm(range(50)):
    start = time.perf_counter()
    cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, n_cfs = 1)
    end = time.perf_counter()
    
    start_b = time.perf_counter()
    baseline_cfs = get_baseline_counterfactuals(model = model, target = desired_class, test = x, \
                                            corpus = X_corpus)
    end_b = time.perf_counter()
    
    start_c = time.perf_counter()
    cf_proto_cf = get_cfproto_cf(X_corpus, model, x)
    end_c = time.perf_counter()
    
    times.append([end - start, end_b - start_b, end_c - start_c])
    sparsity.append([(cfs[0] != x).sum(), (baseline_cfs[0] != x).sum(), (cf_proto_cf[0] != x).sum()])
    success_rate.append([cfs is not None, baseline_cfs is not None, cf_proto_cf is not None])
    in_distribution.append([ood_detector.score_samples(cfs).mean(),ood_detector.score_samples(baseline_cfs).mean(), ood_detector.score_samples(cf_proto_cf).mean()])

  0%|          | 0/50 [00:00<?, ?it/s]No encoder specified. Using k-d trees to represent class prototypes.
  2%|▏         | 1/50 [00:53<43:35, 53.39s/it]No encoder specified. Using k-d trees to represent class prototypes.
  4%|▍         | 2/50 [01:41<40:17, 50.37s/it]No encoder specified. Using k-d trees to represent class prototypes.
  6%|▌         | 3/50 [02:31<39:26, 50.35s/it]No encoder specified. Using k-d trees to represent class prototypes.
  8%|▊         | 4/50 [03:10<35:09, 45.87s/it]No encoder specified. Using k-d trees to represent class prototypes.
 10%|█         | 5/50 [03:46<31:36, 42.15s/it]No encoder specified. Using k-d trees to represent class prototypes.
 12%|█▏        | 6/50 [04:21<29:06, 39.69s/it]No encoder specified. Using k-d trees to represent class prototypes.
 14%|█▍        | 7/50 [04:57<27:36, 38.53s/it]No encoder specified. Using k-d trees to represent class prototypes.
 16%|█▌        | 8/50 [05:33<26:27, 37.80s/it]No encoder specified. Using k-d trees to r

In [10]:
pd.DataFrame(times, columns  = ['simplex', 'nn', 'cfproto']).describe().to_csv(f'results/{dataset}_times.csv')
pd.DataFrame(sparsity, columns  = ['simplex', 'nn', 'cfproto']).applymap(int).describe().to_csv(f'results/{dataset}_sparsity.csv')
pd.DataFrame(success_rate, columns  = ['simplex', 'nn', 'cfproto']).applymap(int).describe().to_csv(f'results/{dataset}_success_rate.csv')
pd.DataFrame(in_distribution, columns  = ['simplex', 'nn', 'cfproto']).describe().to_csv(f'results/{dataset}_in_distribution.csv')

w_c, x_c, proj_jacobian_c = cfs_results[0][0][0] # i is what counterfactual, j is the results of that counterfactaul, k is the explanation base on corpus in i,j

In [7]:
test_id = 1
cfs_masked, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, mask = torch.Tensor([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0]))

In [8]:
display_tabular_cfs([cfs_masked, [], []], model, x, scaler, encoder, cols, f'results/{dataset}_masked')

Unnamed: 0,original,SimplexCF_counterfactual_1
workclass,State-gov,State-gov
education,Some-college,Some-college
marital-status,Divorced,Divorced
occupation,Other-service,Other-service
relationship,Unmarried,Unmarried
race,White,White
gender,Male,Male
native-country,United-States,United-States
capital-gain,0,15024
capital-loss,0,0


In [13]:
ood_detector.score_samples(X_test).mean()

45.59427465845603