In [18]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df
from utils.preprocessing import preprocess_df
from sklearn.model_selection import train_test_split
from utils.dice import generate_dice_result, process_results
from utils.models import train_three_models, load_lp_three_models, train_three_models_lp,train_three_models, evaluation_test, save_three_models, load_three_models
from utils.save import save_result_as_csv

from utils.models import (
    train_three_models,
    evaluation_test,
    save_three_models,
    load_three_models,
)

pd.options.mode.chained_assignment = None 

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

seed = 123
# tf.random.set_seed(seed)
# np.random.seed(seed)


TF version:  2.0.0
Eager execution enabled:  True


In [19]:
#### Select dataset ####

dataset_name = 'adult' # [adult, german, compas]

if dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

In [20]:
#### Load datafram info.
df_info = preprocess_df(dataset_loading_fn)

In [21]:
### Seperate to train and test set.
train_df, test_df = train_test_split(df_info.dummy_df, train_size=.8, random_state=seed, shuffle=True)

In [22]:
test_df

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,...,native-country_Puerto-Rico,native-country_Scotland,native-country_South,native-country_Taiwan,native-country_Thailand,native-country_Trinadad&Tobago,native-country_United-States,native-country_Vietnam,native-country_Yugoslavia,class
20713,0.520548,0.000000,0.000000,0.142857,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
13495,0.657534,0.000000,0.000000,0.193878,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
12367,0.164384,0.000000,0.000000,0.397959,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
22402,0.493151,0.000000,0.000000,0.397959,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,1
18338,0.205479,0.000000,0.000000,0.397959,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53,0.452055,0.000000,0.000000,0.551020,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,1
30311,0.506849,0.000000,0.000000,0.438776,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
24672,0.150685,0.000000,0.399449,0.295918,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
24229,0.383562,0.150242,0.000000,0.397959,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,1


In [23]:
### Get training and testing array.
X_train = np.array(train_df[df_info.ohe_feature_names])
y_train = np.array(train_df[df_info.target_name])
X_test = np.array(test_df[df_info.ohe_feature_names])
y_test = np.array(test_df[df_info.target_name])

In [24]:
### Train modkels.
# models = train_three_models(X_train, y_train)

### Save models.
# save_three_models(models, dataset_name)

In [25]:
#models = train_three_models_lp(X_train, y_train)

### Save models.
#save_lp_three_models(models, dataset_name)

In [26]:
### Load models.
models = load_three_models(X_train.shape[-1], dataset_name)



In [27]:
### Print out accuracy on testset.
evaluation_test(models, X_test, y_test)

DT: [0.8213] | RF [0.8458] | NN [0.8474]


# DiCE

In [28]:
### Setting up the CF generating amount.
num_instances = 20
num_cf_per_instance = 1

In [29]:
# Generate CF
results = generate_dice_result(
    df_info,
    test_df,
    models,
    num_instances,
    num_cf_per_instance,
    sample_size=50,
    models_to_run=['nn']
)
result_dfs = process_results(df_info, results)


Finding counterfactual for nn
instance 0
CF 0


  0%|          | 0/1 [00:00<?, ?it/s]



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



100%|██████████| 1/1 [00:00<00:00,  2.43it/s]


instance 1
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.04it/s]


instance 2
CF 0


100%|██████████| 1/1 [00:00<00:00,  2.87it/s]


instance 3
CF 0


100%|██████████| 1/1 [00:00<00:00,  2.82it/s]


instance 4
CF 0


100%|██████████| 1/1 [00:00<00:00,  2.99it/s]


instance 5
CF 0


100%|██████████| 1/1 [00:00<00:00,  2.97it/s]


instance 6
CF 0


100%|██████████| 1/1 [00:00<00:00,  2.99it/s]


instance 7
CF 0


100%|██████████| 1/1 [00:00<00:00,  1.81it/s]


instance 8
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.12it/s]


instance 9
CF 0


100%|██████████| 1/1 [00:00<00:00,  2.76it/s]


instance 10
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.29it/s]


instance 11
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.07it/s]


instance 12
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.06it/s]


instance 13
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.03it/s]


instance 14
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.05it/s]


instance 15
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.37it/s]


instance 16
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.23it/s]


instance 17
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.34it/s]


instance 18
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.20it/s]


instance 19
CF 0


100%|██████████| 1/1 [00:00<00:00,  3.25it/s]


In [30]:
from utils.dice import Recorder

In [31]:
i = 0
example_input = df_info.scaled_df.iloc[test_df[i:i+1].index].iloc[0:1]

In [32]:
example_input

Unnamed: 0,age,workclass,education,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
20713,0.520548,State-gov,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,Male,0.0,0.0,0.142857,United-States,<=50K


In [33]:
print(Recorder.wrapped_models['nn'].predict(example_input))
print(Recorder.wrapped_models['dt'].predict(example_input))
print(Recorder.wrapped_models['rfc'].predict(example_input))

[0]
[0]
[0]


In [34]:
print(Recorder.wrapped_models['nn'].predict_proba(example_input))
print(Recorder.wrapped_models['dt'].predict_proba(example_input))
print(Recorder.wrapped_models['rfc'].predict_proba(example_input))

[[0.51415384 0.48584616]]
[[1. 0.]]
[[0.7825 0.2175]]


In [35]:
### Save result as file.
save_result_as_csv("lp_dice", dataset_name, result_dfs)

Result has been saved to ./results/lp_dice_adult
