In [93]:
import dice_ml
from dice_ml.utils import helpers # helper functions
from sklearn.model_selection import train_test_split

dataset = helpers.load_adult_income_dataset()
target = dataset["income"] # outcome variable
train_dataset, test_dataset, _, _ = train_test_split(dataset,
                                                     target,
                                                     test_size=0.2,
                                                     random_state=0,
                                                     stratify=target)
# Dataset for training an ML model
d = dice_ml.Data(dataframe=train_dataset,
                 continuous_features=['age', 'hours_per_week'],
                 outcome_name='income')

# Pre-trained ML model
m = dice_ml.Model(model_path=dice_ml.utils.helpers.get_adult_income_modelpath(),
                  backend='TF2', func="ohe-min-max")
# DiCE explanation instance
exp = dice_ml.Dice(d,m)

In [94]:
#dataset.marital_status.unique()

In [95]:
# Generate counterfactual examples
query_instance = test_dataset.drop(columns="income")[0:1]
dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=5, desired_class="opposite", features_to_vary=["age", "gender", "race", "hours_per_week"])
# Visualize counterfactual explanation
dice_exp.visualize_as_dataframe()

100%|██████████| 1/1 [00:04<00:00,  4.78s/it]

Query instance (original outcome : 0)





Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,68.0,Private,HS-grad,Married,Blue-Collar,White,Female,72.0,1
1,29.0,Private,HS-grad,Married,Blue-Collar,Other,Female,85.0,1
2,90.0,Private,HS-grad,Married,Blue-Collar,White,Female,65.0,1
3,29.0,Private,HS-grad,Married,Blue-Collar,White,Female,86.0,1
4,54.0,Private,HS-grad,Married,Blue-Collar,White,Female,56.0,1


In [96]:
dice_exp.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29,Private,HS-grad,Married,Blue-Collar,White,Female,38,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,68.0,-,-,-,-,-,-,72.0,1
1,-,-,-,-,-,Other,-,85.0,1
2,90.0,-,-,-,-,-,-,65.0,1
3,-,-,-,-,-,-,-,86.0,1
4,54.0,-,-,-,-,-,-,56.0,1


In [97]:
dataset.occupation.unique()

array(['White-Collar', 'Professional', 'Service', 'Blue-Collar',
       'Other/Unknown', 'Sales'], dtype=object)

In [98]:
# create a new data point instance
mydata = query_instance.copy()
mydata["age"] = 25
mydata["gender"] = "Male"
mydata["race"] = "White"
mydata["hours_per_week"] = 40
mydata["education"] = "Masters"
mydata["workclass"] = "Private"
mydata["marital_status"] = "Married"
mydata["occupation"] = "White-Collar"

dice_exp = exp.generate_counterfactuals(mydata, total_CFs=10, desired_class="opposite", features_to_vary=["age", "gender", "race"])
# Visualize counterfactual explanation
dice_exp.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:05<00:00,  5.43s/it]

Only 6 (required 10)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 05 sec
Query instance (original outcome : 1)





Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,25,Private,Masters,Married,White-Collar,White,Male,40,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,17.0,-,-,-,-,-,-,-,0
1,18.0,-,-,-,-,-,-,-,0
2,18.0,-,-,-,-,Other,-,-,0
3,19.0,-,-,-,-,Other,-,-,0
4,20.0,-,-,-,-,Other,-,-,0
5,17.0,-,-,-,-,Other,-,-,0
