In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import (
    load_adult_df,
    load_compas_df,
    load_german_df,
    load_diabetes_df,
    load_breast_cancer_df,
)
from sklearn.model_selection import train_test_split
from utils.preprocessing import preprocess_df
from utils.models import (
    train_three_models,
    evaluation_test,
    save_three_models,
    load_three_models,
)
import utils.cf_proto as util_cf_proto
import utils.dice as util_dice
import utils.gs as util_gs
import utils.watcher as util_watcher
import utils.print as print_f


from utils.save import save_result_as_csv

### Only for alibi, to generate the results from GS and DiCE, commenting this section. ####

seed = 123
# tf.random.set_seed(seed)
# np.random.seed(seed)


In [None]:
dataset_name =  "adult" # ["adult","german", "compas","diabetes","breast_cancer"]

print(f"Dataset Name: [{dataset_name}]")

if dataset_name == "adult":
    dataset_loading_fn = load_adult_df
elif dataset_name == "german":
    dataset_loading_fn = load_german_df
elif dataset_name == "compas":
    dataset_loading_fn = load_compas_df
elif dataset_name == "diabetes":
    dataset_loading_fn = load_diabetes_df
elif dataset_name == "breast_cancer":
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

df_info = preprocess_df(dataset_loading_fn)

train_df, test_df = train_test_split(
    df_info.dummy_df, train_size=0.8, random_state=seed, shuffle=True
)
X_train = np.array(train_df[df_info.ohe_feature_names])
y_train = np.array(train_df[df_info.target_name])
X_test = np.array(test_df[df_info.ohe_feature_names])
y_test = np.array(test_df[df_info.target_name])

In [None]:
models = load_three_models(X_train.shape[-1], dataset_name)
evaluation_test(models, X_test, y_test)

In [None]:
num_instances = 20 # (1)&(2) 20
num_cf_per_instance = 1 # (1)&(2)5

In [None]:
# use this to know what're those features can be constrained in this dataset. 
df_info.feature_names 

In [None]:
print_f.print_block(title="Counterfactual Algorithm", content="DiCE")
results = util_dice.generate_dice_result(
    df_info,
    test_df,
    models,
    num_instances,
    num_cf_per_instance,
    sample_size=50,
    models_to_run=["dt", "rfc", "nn"],
    features_to_vary=['age']
)
result_dfs = util_dice.process_results(df_info, results)
save_result_as_csv("dice_feature_constraint", dataset_name, result_dfs)