In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import load_breast_cancer_df, load_diabetes_df
from sklearn.model_selection import train_test_split
from utils.preprocessing import preprocess_df
from utils.models import train_three_models, evaluation_test, save_three_models, load_three_models
from utils.watcher import  generate_watcher_result, process_result

from utils.save import save_result_as_csv

### Disable TF2 and enable TF1 for alibi.
tf.get_logger().setLevel(40) 
tf.compat.v1.disable_v2_behavior()
tf.keras.backend.clear_session()
pd.options.mode.chained_assignment = None 

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False


seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)


TF version:  2.4.0-rc0
Eager execution enabled:  False


In [2]:
#### Select dataset ####

dataset_name = 'breast_cancer' # [adult, german, compas]

if dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

In [3]:
#### Load datafram info.
df_info = preprocess_df(dataset_loading_fn)

In [4]:
### Seperate to train and test set.
train_df, test_df = train_test_split(df_info.dummy_df, train_size=.8, random_state=seed, shuffle=True)

In [5]:
### Get training and testing array.
X_train = np.array(train_df[df_info.ohe_feature_names])
y_train = np.array(train_df[df_info.target_name])
X_test = np.array(test_df[df_info.ohe_feature_names])
y_test = np.array(test_df[df_info.target_name])

In [6]:
# ## Train models.
# models = train_three_models(X_train, y_train)

# ## Save models.
# save_three_models(models, dataset_name)

In [7]:
### Load models
models = load_three_models(X_train.shape[-1], dataset_name)



In [8]:
### Print out accuracy on testset.
evaluation_test(models, X_test, y_test)

DT: [0.7727] | RF [0.7987] | NN [0.7662]


## Watcher Counterfactual

In [9]:
### Setting up the CF generating amount.
num_instances = 5
num_cf_per_instance = 1

In [10]:
### Generate CF
results = generate_watcher_result(df_info, train_df, models, num_instances, num_cf_per_instance, X_train, X_test, y_test, max_iters=1000)
result_dfs = process_result(results, df_info)

Finding counterfactual for dt
instance 0
CF 0
Found CF
instance 1
CF 0
Found CF
instance 2
CF 0
Found CF
instance 3
CF 0
CF not found
instance 4
CF 0
CF not found
Finding counterfactual for rfc
instance 0
CF 0
Found CF
instance 1
CF 0
CF not found
instance 2
CF 0
Found CF
instance 3
CF 0
CF not found
instance 4
CF 0
CF not found
Finding counterfactual for nn
instance 0
CF 0
CF not found
instance 1
CF 0
CF not found
instance 2
CF 0
CF not found
instance 3
CF 0
CF not found
instance 4
CF 0
CF not found


In [11]:
### Save result as file.
save_result_as_csv("watcher", dataset_name, result_dfs)

Result has been saved to ./results/watcher_diabetes


In [12]:
result_dfs['nn']

Unnamed: 0,scaled_input_Pregnancies,scaled_input_Glucose,scaled_input_BloodPressure,scaled_input_SkinThickness,scaled_input_Insulin,scaled_input_BMI,scaled_input_DiabetesPedigreeFunction,scaled_input_Age,scaled_input_Outcome,origin_input_Pregnancies,...,origin_input_SkinThickness,origin_input_Insulin,origin_input_BMI,origin_input_DiabetesPedigreeFunction,origin_input_Age,origin_input_Outcome,running_time,Found,ground_truth,prediction
0,0.411765,0.909548,0.688525,0.212121,0.22695,0.535022,0.216909,0.5,Y,7.0,...,21.0,192.0,35.9,0.586,51.0,Y,2.745827,N,Y,Y
0,0.117647,0.638191,0.47541,0.242424,0.325059,0.412817,0.649872,0.066667,N,2.0,...,24.0,275.0,27.7,1.6,25.0,N,2.689703,N,N,N
0,0.647059,0.693467,0.622951,0.0,0.0,0.494784,0.146029,0.233333,N,11.0,...,0.0,0.0,33.2,0.42,35.0,N,2.695067,N,N,N
0,0.117647,0.407035,0.491803,0.222222,0.0,0.412817,0.090521,0.066667,N,2.0,...,22.0,0.0,27.7,0.29,25.0,N,2.701206,N,N,N
0,0.0,0.422111,0.672131,0.313131,0.147754,0.5693,0.066183,0.033333,N,0.0,...,31.0,125.0,38.2,0.233,23.0,N,2.706162,N,N,N
