In [1]:
import tensorflow as tf
import dice_ml
import pandas as pd
from learning import *
from time import time

In [2]:
# Giving current root path
PATH = "./"

# name of dataset
DATASET_NAME = "diabetes.csv"

# variable containing the class labels in this case the dataset contains:
# 0 - if not diabetes
# 1 - if diabetes
class_var = "Outcome"

# load dataset
dataset_path = PATH + "datasets/" + DATASET_NAME
data = pd.read_csv( dataset_path )

# features
feature_names = data.drop([class_var], axis=1).columns.to_list()

# balance dataset
sampled_data = data.sample(frac=1)
sampled_data = sampled_data[ sampled_data["Outcome"] == 0]

no_data = sampled_data.sample(frac=1)[0:268]
yes_data = data[ data["Outcome"] == 1]

balanced_data = [no_data,yes_data]
balanced_data = pd.concat(balanced_data)

# apply one hot encoder to data
# standardize the input between 0 and 1
X, Y, encoder, scaler = encode_data( balanced_data, class_var)

n_features = X.shape[1]
n_classes = len(data[class_var].unique())

# load existing training data
print("Loading training data...")
X_train, Y_train, X_test, Y_test, X_validation, Y_validation= load_training_data( dataset_path )

print("====================Features====================")
print(feature_names)
print("================================================")

Loading training data...
['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']


In [3]:
# the best performing model was obtained with 5 hidden layers with 12 neurons each
model_name = "model_h5_N12"

# specify paths where the blackbox model was saved
path_serialisation_model = PATH + "training/" + DATASET_NAME.replace(".csv", "") + "/model/" 
path_serialisation_histr = PATH + "training/" + DATASET_NAME.replace(".csv", "") + "/history/" 

# load model and model performance history
print("Loading Blackbox model...")
model_history = load_model_history( model_name, path_serialisation_histr )
model = load_model( model_name, path_serialisation_model )

# check modelxw
model.summary()

Loading Blackbox model...
Loaded model from disk
Model: "model_h5_N12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_293 (Dense)            (None, 12)                108       
_________________________________________________________________
dense_294 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_295 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_296 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_297 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_298 (Dense)            (None, 12)                156       
_________________________________________________________________
dense

In [4]:
class FirstValue(tf.keras.layers.Layer):
    def __init__(self,):
        super(FirstValue, self).__init__()

    def call(self, inputs):
        return inputs[:, :1]

In [5]:
target_name = "Outcome"
feature_names = list(balanced_data.columns)
feature_names.remove(target_name)

In [6]:
d = dice_ml.Data(dataframe=balanced_data, continuous_features=feature_names, outcome_name=target_name)

In [7]:
seq = tf.keras.Sequential(
    [model,
    FirstValue(),]
)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [8]:
m = dice_ml.Model(model=seq, backend="TF2")

In [12]:
test_df = pd.DataFrame(X_test, columns=feature_names)
example_idx = 2 ## Change me!!
example_data = test_df.iloc[example_idx:example_idx+6]

In [14]:
exp = dice_ml.Dice(d, m)
dice_exp = exp.generate_counterfactuals(example_data, total_CFs=4, desired_class="opposite", proximity_weight= 0.1, diversity_weight=2)

Only 3 (required 4)  Diverse Counterfactuals found for the given configuation, perhaps try with different values of proximity (or diversity) weights or learning rate... ; total time taken: 04 min 33 sec


In [15]:
dice_exp.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome : 1)


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.0,1.0,1.0,0.0,0.0,0.4,0.048,0.0,0.925
1,0.0,1.0,1.0,0.0,0.0,0.5,0.272,0.0,0.925
2,0.0,1.0,0.0,0.0,0.0,0.4,0.232,0.0,0.925
3,0.0,1.0,1.0,0.0,0.0,0.4,0.238,0.0,0.925
4,1.0,1.0,1.0,0.0,0.0,0.3,0.069,1.0,0.925
5,0.0,1.0,1.0,0.0,0.0,0.6,0.185,0.0,0.925



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,-,137.0,-,-,287.0,8.6,0.22,27.0,0.0
1,-,99.0,-,20.0,758.0,2.0,0.232,42.0,0.0
2,-,199.0,-,4.0,39.0,-,0.0489999999999999,21.0,0.0
