In [1]:
from util import *

from src.dataset import load_adult
from src.counterfactual import get_baseline_counterfactuals

import joblib
import time

import warnings
warnings.filterwarnings('ignore')
                        
# Get a model
dataset = 'adult'
model, encoder, scaler = joblib.load(f'models/{dataset}.gz') # Model should have the BlackBox interface
cols = load_columns(dataset)

2023-02-01 22:06:15.311412: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
loader = get_loader(dataset)
X_corpus, _ = next(loader)
X_test, _ = next(loader)

simplex = get_simplex(model, X_corpus, X_test, verbose = True)

Weight Fitting Epoch: 2000/10000 ; Error: 100 ; Regulator: 369 ; Reg Factor: 1
Weight Fitting Epoch: 4000/10000 ; Error: 18.7 ; Regulator: 58.4 ; Reg Factor: 1
Weight Fitting Epoch: 6000/10000 ; Error: 8.96 ; Regulator: 12.8 ; Reg Factor: 1
Weight Fitting Epoch: 8000/10000 ; Error: 6.91 ; Regulator: 4.38 ; Reg Factor: 1
Weight Fitting Epoch: 10000/10000 ; Error: 6.01 ; Regulator: 1.67 ; Reg Factor: 1


In [3]:
def get_attributions_image(explanations, id):
    features_df = pd.DataFrame()
    jcb_df = pd.DataFrame()
    for exp, ids in zip(explanations[id][0],explanations[id][1]):
        (w, _, jcbs), corpus_id = exp, ids 
        df = inverse_transform_x(X_corpus[corpus_id].reshape(1, -1), scaler, encoder, cols).T
        name = str(round(w, 2)) + ' x '
        jcb_df[name] = pd.Series(jcbs)
        features_df[name] = df
    return features_df, jcb_df

In [14]:
%%time
test_id = 1

cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, n_cfs = 1)
display_tabular_cfs([cfs, [], []], model, x, scaler, encoder, cols, f'results/{test_id}_sample.xlsx')

cf_explanations = simplex.get_counterfactuals_explanation(model, cfs=cfs)
test_explanations = simplex.get_full_test_explanation(model)

test_features_df, test_jcb_df = get_attributions_image(test_explanations, 0)
display(test_features_df.style.apply(b_g,A = test_jcb_df).to_excel(f'results/{test_id}_test_explanation.xlsx'))

cf_features_df, cf_jcb_df = get_attributions_image(cf_explanations, 0)
cf_features_df.style.apply(b_g,A = cf_jcb_df).to_excel(f'results/{test_id}_cf_explanation.xlsx')

Unnamed: 0,original,SimplexCF_counterfactual_1
workclass,State-gov,Local-gov
education,Some-college,Some-college
marital-status,Divorced,Married-civ-spouse
occupation,Other-service,Exec-managerial
relationship,Unmarried,Husband
race,White,White
gender,Male,Male
native-country,United-States,United-States
capital-gain,0,0
capital-loss,0,0


None

CPU times: user 8min 55s, sys: 48.9 s, total: 9min 44s
Wall time: 1min 37s


In [15]:
%%time
test_id = 10

cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, n_cfs = 1)
display_tabular_cfs([cfs, [], []], model, x, scaler, encoder, cols, f'results/{test_id}_sample.xlsx')

cf_explanations = simplex.get_counterfactuals_explanation(model, cfs=cfs)
test_explanations = simplex.get_full_test_explanation(model)

test_features_df, test_jcb_df = get_attributions_image(test_explanations, 0)
display(test_features_df.style.apply(b_g,A = test_jcb_df).to_excel(f'results/{test_id}_test_explanation.xlsx'))

cf_features_df, cf_jcb_df = get_attributions_image(cf_explanations, 0)
cf_features_df.style.apply(b_g,A = cf_jcb_df).to_excel(f'results/{test_id}_cf_explanation.xlsx')

Unnamed: 0,original,SimplexCF_counterfactual_1
workclass,Self-emp-not-inc,Local-gov
education,HS-grad,Some-college
marital-status,Married-civ-spouse,Married-civ-spouse
occupation,Farming-fishing,Exec-managerial
relationship,Husband,Husband
race,White,White
gender,Male,Male
native-country,United-States,United-States
capital-gain,0,0
capital-loss,0,0


None

CPU times: user 7min 52s, sys: 44.1 s, total: 8min 36s
Wall time: 1min 19s


In [16]:
%%time
test_id = 100

cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, n_cfs = 1)
display_tabular_cfs([cfs, [], []], model, x, scaler, encoder, cols, f'results/{test_id}_sample.xlsx')

cf_explanations = simplex.get_counterfactuals_explanation(model, cfs=cfs)
test_explanations = simplex.get_full_test_explanation(model)

test_features_df, test_jcb_df = get_attributions_image(test_explanations, 0)
display(test_features_df.style.apply(b_g,A = test_jcb_df).to_excel(f'results/{test_id}_test_explanation.xlsx'))

cf_features_df, cf_jcb_df = get_attributions_image(cf_explanations, 0)
cf_features_df.style.apply(b_g,A = cf_jcb_df).to_excel(f'results/{test_id}_cf_explanation.xlsx')

Unnamed: 0,original,SimplexCF_counterfactual_1
workclass,?,Local-gov
education,Some-college,Some-college
marital-status,Never-married,Married-civ-spouse
occupation,?,Exec-managerial
relationship,Own-child,Husband
race,White,White
gender,Female,Male
native-country,United-States,United-States
capital-gain,0,0
capital-loss,0,0


None

CPU times: user 9min 15s, sys: 49 s, total: 10min 4s
Wall time: 1min 42s


In [9]:
from tqdm import tqdm

test_weights = []
cf_weights = []

for test_id in tqdm(range(100)):
    cfs, x, desired_class = get_simplex_cf_tabular(simplex, model, test_id, encoder, n_cfs = 1)
    cf_explanations = simplex.get_counterfactuals_explanation(model, cfs=cfs)
    
    w_sum = 0
    for exp in test_explanations[test_id][0]:
        w, _, _= exp
        w_sum += w
        test_weights.append(w_sum)

    w_sum = 0
    for exp in cf_explanations[0][0]:
        w, _, _= exp
        w_sum += w
        cf_weights.append(w_sum)

100%|██████████| 100/100 [08:43<00:00,  5.23s/it]


In [10]:
pd.Series(test_weights).describe()

count    300.000000
mean       0.862315
std        0.159265
min        0.302760
25%        0.788780
50%        0.942460
75%        0.977287
max        0.996591
dtype: float64

In [11]:
pd.Series(cf_weights).describe()

count    300.000000
mean       0.863936
std        0.169395
min        0.481729
25%        0.689970
50%        0.975857
75%        0.988609
max        0.995025
dtype: float64

w_c, x_c, proj_jacobian_c = cfs_results[0][0][0] # i is what counterfactual, j is the results of that counterfactaul, k is the explanation base on corpus in i,j