In [1]:
from Controller import TrainingController, ExplainingController
from Utils.SaveUtils import load_parameters
from Parameters import TrainingParameters, PredictingParameters
import tensorflow as tf
from IPython.core.display import display, HTML
import json
import numpy as np
import pandas as pd
from Parameters.Enums import TracePermutationStrategies
from Models import DiCEBinaryOutputModel

import dice_ml
from dice_ml.utils import helpers

In [2]:
### Select models to load
folder_path = "./SavedModels/%s" % (
 "0.7924_BPI2012_BaseLineLSTMModel_2021-06-11 17:08:27.383137" # AOW
)

In [3]:
### Setting up parameters
parameters_json = load_parameters(folder_path=folder_path)
parameters = TrainingParameters(**parameters_json)
tf.random.set_seed(parameters.dataset_split_seed)
np.random.seed(parameters.dataset_split_seed)
parameters.load_model_folder_path = folder_path
predicting_parameters = PredictingParameters()
predicting_parameters.load_model_folder_path = folder_path

In [4]:
trainer = TrainingController(parameters = parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012/AOW 

| Model loaded successfully from: ./SavedModels/0.7924_BPI2012_BaseLineLSTMModel_2021-06-11 17:08:27.383137  


In [5]:
trainer.model.vocab.vocabs

['<PAD>',
 '<EOS>',
 '<SOS>',
 'A_ACCEPTED_COMPLETE',
 'A_ACTIVATED_COMPLETE',
 'A_APPROVED_COMPLETE',
 'A_CANCELLED_COMPLETE',
 'A_DECLINED_COMPLETE',
 'A_FINALIZED_COMPLETE',
 'A_PARTLYSUBMITTED_COMPLETE',
 'A_PREACCEPTED_COMPLETE',
 'A_REGISTERED_COMPLETE',
 'A_SUBMITTED_COMPLETE',
 'O_ACCEPTED_COMPLETE',
 'O_CANCELLED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_DECLINED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'O_SENT_BACK_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Afhandelen leads_COMPLETE',
 'W_Beoordelen fraude_COMPLETE',
 'W_Completeren aanvraag_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Valideren aanvraag_COMPLETE']

In [6]:
explainer = ExplainingController(parameters=parameters, predicting_parameters= predicting_parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Model loaded successfully from: ./SavedModels/0.7924_BPI2012_BaseLineLSTMModel_2021-06-11 17:08:27.383137  


In [7]:
########### Define example ###########
# example_vocab_trace = ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_COMPLETE']

example_vocab_trace = ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE']

# 'A_CANCELLED_COMPLETE'

example_idx_trace = np.array(explainer.model.vocab.list_of_vocab_to_index(example_vocab_trace))

########### Throw example into model ###########

example_input = tf.constant([example_idx_trace])

out, _ = explainer.model(example_input)

predicted_vocab_distributions = tf.gather(out, len(example_vocab_trace)-1, axis=1) 
predicted_vocab_distributions_df = pd.DataFrame(predicted_vocab_distributions.numpy().tolist(),columns=explainer.model.vocab.vocabs)
max_arg = tf.math.argmax(predicted_vocab_distributions, axis=-1).numpy()[0]
max_prob_vocab = explainer.model.vocab.index_to_vocab(max_arg)
print("Predicted activity with highest probability (%.2f) is \"%s\"" % (predicted_vocab_distributions[0][max_arg].numpy(), max_prob_vocab))
display(predicted_vocab_distributions_df)
print(predicted_vocab_distributions_df.iloc[0])

Predicted activity with highest probability (0.72) is "W_Nabellen offertes_COMPLETE"


Unnamed: 0,<PAD>,<EOS>,<SOS>,A_ACCEPTED_COMPLETE,A_ACTIVATED_COMPLETE,A_APPROVED_COMPLETE,A_CANCELLED_COMPLETE,A_DECLINED_COMPLETE,A_FINALIZED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,...,O_DECLINED_COMPLETE,O_SELECTED_COMPLETE,O_SENT_BACK_COMPLETE,O_SENT_COMPLETE,W_Afhandelen leads_COMPLETE,W_Beoordelen fraude_COMPLETE,W_Completeren aanvraag_COMPLETE,W_Nabellen incomplete dossiers_COMPLETE,W_Nabellen offertes_COMPLETE,W_Valideren aanvraag_COMPLETE
0,2.7e-05,0.004371,1.8e-05,5.1e-05,4.1e-05,1e-05,0.044487,0.000771,0.000127,6.3e-05,...,0.000997,0.013255,0.124609,4.1e-05,8.8e-05,1.9e-05,0.00011,0.000407,0.718075,0.000232


<PAD>                                      0.000027
<EOS>                                      0.004371
<SOS>                                      0.000018
A_ACCEPTED_COMPLETE                        0.000051
A_ACTIVATED_COMPLETE                       0.000041
A_APPROVED_COMPLETE                        0.000010
A_CANCELLED_COMPLETE                       0.044487
A_DECLINED_COMPLETE                        0.000771
A_FINALIZED_COMPLETE                       0.000127
A_PARTLYSUBMITTED_COMPLETE                 0.000063
A_PREACCEPTED_COMPLETE                     0.000061
A_REGISTERED_COMPLETE                      0.000013
A_SUBMITTED_COMPLETE                       0.000118
O_ACCEPTED_COMPLETE                        0.000050
O_CANCELLED_COMPLETE                       0.091945
O_CREATED_COMPLETE                         0.000014
O_DECLINED_COMPLETE                        0.000997
O_SELECTED_COMPLETE                        0.013255
O_SENT_BACK_COMPLETE                       0.124609
O_SENT_COMPL

In [8]:
### Remove tags
example_idx_trace_without_tags = [ i for i in  example_idx_trace if not i in explainer.vocab.tags_idx()]
example_vocab_trace_without_tags = explainer.model.vocab.list_of_index_to_vocab(example_idx_trace_without_tags)

print("=============Example without tags=============")
print(example_vocab_trace_without_tags)

['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE']


In [9]:
### Set up arguments for CF
feature_names = np.array(["step_%d" % (i+1) for i in range(len(example_vocab_trace_without_tags))])
without_tags_vocabs = [ v for v in explainer.vocab.vocabs if not "<" in v]
cat_vars_without_tag = {}
for k in feature_names:
    cat_vars_without_tag[k] = without_tags_vocabs

In [10]:
#### Define the desired activity.
desired_activity = 'A_APPROVED_COMPLETE'
print("Desired activity is \"%s\"" %(desired_activity))

Desired activity is "A_APPROVED_COMPLETE"


In [11]:
dice_binary_model = DiCEBinaryOutputModel(explainer.model, explainer.vocab, desired=explainer.vocab.vocab_to_index(desired_activity), trace_length = len(example_vocab_trace_without_tags), without_tags_vocabs = without_tags_vocabs)

In [12]:
d = dice_ml.Data(features=cat_vars_without_tag, outcome_name="predicted",continuous_features = [])
m = dice_ml.Model(model= dice_binary_model, backend="TF2")
exp = dice_ml.Dice(d, m)

In [13]:
example_df = pd.DataFrame([example_vocab_trace_without_tags], columns= feature_names)

In [14]:
example_df

Unnamed: 0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,...,step_12,step_13,step_14,step_15,step_16,step_17,step_18,step_19,step_20,step_21
0,A_SUBMITTED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_PREACCEPTED_COMPLETE,A_ACCEPTED_COMPLETE,O_SELECTED_COMPLETE,A_FINALIZED_COMPLETE,O_CREATED_COMPLETE,O_SENT_COMPLETE,W_Completeren aanvraag_COMPLETE,O_CANCELLED_COMPLETE,...,O_CREATED_COMPLETE,O_SENT_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,A_CANCELLED_COMPLETE


In [24]:
dice_exp = exp.generate_counterfactuals(
        example_df,
        total_CFs=1,
        desired_class="opposite",
        min_iter=100, max_iter=2000,
        features_to_vary="all",
        algorithm = "DiverseCF", # DiverseCF, RandomInitCF
        proximity_weight=1000, #0.5,
        diversity_weight=2000,#1.0,
        init_near_query_instance=True,
        tie_random = True,
        categorical_penalty = 1,
        learning_rate=0.0005,
        verbose=True,
    )

step 1,  loss=660.594
step 51,  loss=628.084
step 101,  loss=596.864
step 151,  loss=566.956
step 201,  loss=538.328
step 251,  loss=510.943
step 301,  loss=484.765
step 351,  loss=459.759
step 401,  loss=435.888
step 451,  loss=413.12
step 501,  loss=391.42
step 551,  loss=370.754
step 601,  loss=351.09
step 651,  loss=332.396
step 701,  loss=314.64
step 751,  loss=297.791
step 801,  loss=281.819
step 851,  loss=266.693
step 901,  loss=252.385
step 951,  loss=238.865
step 1001,  loss=226.105
step 1051,  loss=214.076
step 1101,  loss=202.751
step 1151,  loss=192.102
step 1201,  loss=182.104
step 1251,  loss=172.729
step 1301,  loss=163.953
step 1351,  loss=155.748
step 1401,  loss=148.091
step 1451,  loss=140.957
step 1501,  loss=134.321
step 1551,  loss=128.16
step 1601,  loss=122.451
step 1651,  loss=117.171
step 1701,  loss=112.297
step 1751,  loss=107.808
step 1801,  loss=103.683
step 1851,  loss=99.8998
step 1901,  loss=96.4394
step 1951,  loss=93.2817
Diverse Counterfactuals foun

In [25]:
#### Check what permutations are fed into the model. ####
all_received_activities = []
for i in range(len(example_vocab_trace_without_tags)):
    trace = explainer.model.vocab.list_of_index_to_vocab(list(set((np.squeeze(np.array(dice_binary_model.all_trace))[:, i]).tolist())))
    all_received_activities.append(trace)
print(all_received_activities)

[['A_ACCEPTED_COMPLETE', 'A_SUBMITTED_COMPLETE'], ['A_ACCEPTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_REGISTERED_COMPLETE'], ['A_PREACCEPTED_COMPLETE', 'A_REGISTERED_COMPLETE'], ['O_SENT_BACK_COMPLETE', 'A_ACCEPTED_COMPLETE'], ['O_SELECTED_COMPLETE', 'A_REGISTERED_COMPLETE'], ['A_FINALIZED_COMPLETE', 'O_ACCEPTED_COMPLETE'], ['O_CREATED_COMPLETE'], ['O_SENT_COMPLETE', 'A_DECLINED_COMPLETE'], ['W_Completeren aanvraag_COMPLETE', 'O_CREATED_COMPLETE'], ['A_ACCEPTED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_CANCELLED_COMPLETE', 'A_DECLINED_COMPLETE', 'A_FINALIZED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_SUBMITTED_COMPLETE', 'O_ACCEPTED_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_CREATED_COMPLETE', 'O_DECLINED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_SENT_BACK_COMPLETE', 'O_SENT_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Beoordelen fraude_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLET

In [26]:
########## Checking if all the permutation are the same (Shouldn't be) ##########
all_cf_are_same = all([all((dice_binary_model.all_cf_input[1] == dice_binary_model.all_cf_input[i]).tolist()[0]) for i in range(len(dice_binary_model.all_cf_input))][1:])
print("All inputs are the same: %s" % (all_cf_are_same))

All inputs are the same: False


In [27]:
############### All Predicted next activity ###############
from itertools import chain
explainer.model.vocab.list_of_index_to_vocab(list(set(list(chain.from_iterable(dice_binary_model.all_predicted)))))

['<EOS>',
 'A_ACCEPTED_COMPLETE',
 'A_ACTIVATED_COMPLETE',
 'A_APPROVED_COMPLETE',
 'A_CANCELLED_COMPLETE',
 'A_DECLINED_COMPLETE',
 'A_FINALIZED_COMPLETE',
 'A_PARTLYSUBMITTED_COMPLETE',
 'A_PREACCEPTED_COMPLETE',
 'A_REGISTERED_COMPLETE',
 'O_ACCEPTED_COMPLETE',
 'O_CANCELLED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_DECLINED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'O_SENT_BACK_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Afhandelen leads_COMPLETE',
 'W_Beoordelen fraude_COMPLETE',
 'W_Completeren aanvraag_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Valideren aanvraag_COMPLETE']

In [28]:
dice_exp.visualize_as_dataframe(show_only_changes=True, display_sparse_df=False)

Query instance (original outcome : 0)


Unnamed: 0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,...,step_12,step_13,step_14,step_15,step_16,step_17,step_18,step_19,step_20,predicted
0,A_SUBMITTED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_PREACCEPTED_COMPLETE,A_ACCEPTED_COMPLETE,O_SELECTED_COMPLETE,A_FINALIZED_COMPLETE,O_CREATED_COMPLETE,O_SENT_COMPLETE,W_Completeren aanvraag_COMPLETE,O_CANCELLED_COMPLETE,...,O_CREATED_COMPLETE,O_SENT_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_COMPLETE,0.0



Diverse Counterfactual set without sparsity correction since only metadata about each  feature is available (new outcome:  1.0


Unnamed: 0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,...,step_12,step_13,step_14,step_15,step_16,step_17,step_18,step_19,step_20,predicted
0,-,A_ACCEPTED_COMPLETE,-,-,-,-,-,-,-,A_ACCEPTED_COMPLETE,...,W_Nabellen offertes_COMPLETE,O_DECLINED_COMPLETE,W_Valideren aanvraag_COMPLETE,O_CREATED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_DECLINED_COMPLETE,O_SENT_COMPLETE,W_Afhandelen leads_COMPLETE,A_REGISTERED_COMPLETE,0.524


In [29]:
######## Print the example trace ########
print(list(example_df.iloc[0]))

['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE']


In [30]:
######## Print the counterfactual trace ########
print(list(dice_exp.final_cfs_df.iloc[0][:-1]))

['A_SUBMITTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_DECLINED_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'O_CREATED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_DECLINED_COMPLETE', 'O_SENT_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'A_REGISTERED_COMPLETE']
