In [1]:
from Controller import TrainingController, ExplainingController
from Utils.SaveUtils import load_parameters
from Parameters import TrainingParameters, PredictingParameters
import tensorflow as tf
from IPython.core.display import display, HTML
import json
import numpy as np
import pandas as pd
from Parameters.Enums import TracePermutationStrategies

import dice_ml
from dice_ml.utils import helpers


In [2]:
### Identified important activities
a_important_activities = ["A_ACTIVATED_COMPLETE", "A_APPROVED_COMPLETE", "A_REGISTERED_COMPLETE", "A_DECLINED_COMPLETE"]
o_important_activities = ["O_ACCEPTED_COMPLETE", "O_DECLINED_COMPLETE"]
all_important_vocabs = a_important_activities+o_important_activities

In [3]:
### Select models to load
folder_path = "./SavedModels/%s" % (
# "0.8569_BPI2012_BaseLineLSTMModel_2021-05-28 19:02:39.442554" # OW
# "0.8324_BPI2012_BaseLineLSTMModel_2021-06-05 15:19:51.992793" # W
"0.8613_BPI2012_BaseLineLSTMModel_2021-06-07 17:36:01.075556" # AOW
)

In [4]:
### Setting up parameters
parameters_json = load_parameters(folder_path=folder_path)
parameters = TrainingParameters(**parameters_json)
tf.random.set_seed(parameters.dataset_split_seed)
np.random.seed(parameters.dataset_split_seed)
parameters.load_model_folder_path = folder_path
predicting_parameters = PredictingParameters()
predicting_parameters.load_model_folder_path = folder_path

In [5]:
trainer = TrainingController(parameters = parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012/AOW 

| Model loaded successfully from: ./SavedModels/0.8613_BPI2012_BaseLineLSTMModel_2021-06-07 17:36:01.075556  


In [6]:
all_last_steps =  [i[-2] for i in trainer.dataset.df["trace"]]
last_df = pd.DataFrame(all_last_steps, columns=["last_step"])
ending_vocab_counts = {}
ending_value_count_dict = dict(last_df["last_step"].value_counts())
for k in ending_value_count_dict:
    vocab_key = trainer.model.vocab.index_to_vocab(k)
    ending_vocab_counts[vocab_key] = ending_value_count_dict[k]

In [7]:
ending_vocab_counts

{'A_DECLINED_COMPLETE': 3429,
 'W_Valideren aanvraag_COMPLETE': 2745,
 'W_Afhandelen leads_COMPLETE': 2234,
 'W_Completeren aanvraag_COMPLETE': 1939,
 'W_Nabellen offertes_COMPLETE': 1289,
 'A_CANCELLED_COMPLETE': 655,
 'W_Nabellen incomplete dossiers_COMPLETE': 452,
 'O_CANCELLED_COMPLETE': 279,
 'W_Beoordelen fraude_COMPLETE': 57,
 'W_Wijzigen contractgegevens_SCHEDULE': 4,
 'W_Valideren aanvraag_START': 2,
 'W_Nabellen offertes_START': 1,
 'A_REGISTERED_COMPLETE': 1}

In [8]:
print("Most common ending activities: ")
print(trainer.model.vocab.list_of_index_to_vocab(list(last_df["last_step"].value_counts().keys())))

Most common ending activities: 
['A_DECLINED_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'A_CANCELLED_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'O_CANCELLED_COMPLETE', 'W_Beoordelen fraude_COMPLETE', 'W_Wijzigen contractgegevens_SCHEDULE', 'W_Valideren aanvraag_START', 'W_Nabellen offertes_START', 'A_REGISTERED_COMPLETE']


In [9]:
trainer.model.vocab.vocab_dict

{'<EOS>': 1,
 '<SOS>': 2,
 'A_ACCEPTED_COMPLETE': 3,
 'A_ACTIVATED_COMPLETE': 4,
 'A_APPROVED_COMPLETE': 5,
 'A_CANCELLED_COMPLETE': 6,
 'A_DECLINED_COMPLETE': 7,
 'A_FINALIZED_COMPLETE': 8,
 'A_PARTLYSUBMITTED_COMPLETE': 9,
 'A_PREACCEPTED_COMPLETE': 10,
 'A_REGISTERED_COMPLETE': 11,
 'A_SUBMITTED_COMPLETE': 12,
 'O_ACCEPTED_COMPLETE': 13,
 'O_CANCELLED_COMPLETE': 14,
 'O_CREATED_COMPLETE': 15,
 'O_DECLINED_COMPLETE': 16,
 'O_SELECTED_COMPLETE': 17,
 'O_SENT_BACK_COMPLETE': 18,
 'O_SENT_COMPLETE': 19,
 'W_Afhandelen leads_COMPLETE': 20,
 'W_Afhandelen leads_SCHEDULE': 21,
 'W_Afhandelen leads_START': 22,
 'W_Beoordelen fraude_COMPLETE': 23,
 'W_Beoordelen fraude_SCHEDULE': 24,
 'W_Beoordelen fraude_START': 25,
 'W_Completeren aanvraag_COMPLETE': 26,
 'W_Completeren aanvraag_SCHEDULE': 27,
 'W_Completeren aanvraag_START': 28,
 'W_Nabellen incomplete dossiers_COMPLETE': 29,
 'W_Nabellen incomplete dossiers_SCHEDULE': 30,
 'W_Nabellen incomplete dossiers_START': 31,
 'W_Nabellen offertes

In [10]:
explainer = ExplainingController(parameters=parameters, predicting_parameters= predicting_parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Model loaded successfully from: ./SavedModels/0.8613_BPI2012_BaseLineLSTMModel_2021-06-07 17:36:01.075556  


In [11]:
tags_to_remove = ["<PAD>", "<EOS>", "<SOS>"]
index_to_remove = explainer.model.vocab.list_of_vocab_to_index(tags_to_remove)

In [12]:
########### Get example data from trainer ###########
trainer.test_dataset.unbatch()
ordered_test_idx = (list(trainer.test_dataset.unbatch().as_numpy_iterator()))
ordered_test_idx.sort()
print("Test set length: %d" %(len(ordered_test_idx)))

Test set length: 1308


In [13]:
success_activities = ['O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE']
all_success_case_in_test = []
for i in ordered_test_idx:
    idx_trace = trainer.dataset.collate_fn([i])[1][0]
    vocab_trace = explainer.model.vocab.list_of_index_to_vocab(idx_trace.tolist())
    if any([ s_a in vocab_trace  for s_a in success_activities]):
        all_success_case_in_test.append(i)

all_fail_case_in_test = [ i for i in ordered_test_idx if (i not in all_success_case_in_test) ]

In [14]:
print("Trace count (Successful): %d " % (len(all_success_case_in_test))) 
print("Trace count (Failed): %d " % (len(all_fail_case_in_test))) 

Trace count (Successful): 224 
Trace count (Failed): 1084 


In [15]:
mean_success_length = np.mean([len(trainer.dataset.collate_fn([i])[1][0]) for i in all_success_case_in_test])
mean_fail_length = np.mean([len(trainer.dataset.collate_fn([i])[1][0]) for i in all_fail_case_in_test])
print("Mean trace length (Successful): %.2f " % (mean_success_length)) 
print("Mean trace length (Failed): %.2f " % (mean_fail_length)) 

Mean trace length (Successful): 44.94 
Mean trace length (Failed): 16.00 


In [16]:
success_case_idx = 2
fail_case_idx = 7
print("=========================Accepted=========================")
accpeted_example_idx_trace = trainer.dataset.collate_fn([all_success_case_in_test[success_case_idx]])[1][0]
accpeted_example_vocab_trace = explainer.model.vocab.list_of_index_to_vocab(accpeted_example_idx_trace.tolist())
print(accpeted_example_vocab_trace)
print("=========================Failed===========================")
failed_example_idx_trace = trainer.dataset.collate_fn([all_fail_case_in_test[fail_case_idx]])[1][0]
failed_example_vocab_trace = explainer.model.vocab.list_of_index_to_vocab(failed_example_idx_trace.tolist())
print(failed_example_vocab_trace)

['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_SCHEDULE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'O_SENT_BACK_COMPLETE', 'W

In [17]:
# example_vocab_input = ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE'] # 'A_FINALIZED_COMPLETE']

example_vocab_input = ['<SOS>','A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_SCHEDULE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START', 'O_SENT_BACK_COMPLETE', 'W_Valideren aanvraag_SCHEDULE', 'W_Nabellen offertes_COMPLETE', 'W_Valideren aanvraag_START', 'W_Valideren aanvraag_COMPLETE', 'W_Valideren aanvraag_START', 'W_Nabellen incomplete dossiers_SCHEDULE', 'W_Valideren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE']
example_vocab_input = example_vocab_input[:20]
example_idx_trace = np.array(explainer.model.vocab.list_of_vocab_to_index(example_vocab_input))
#### Throw example into model ####
example_input = tf.constant([example_idx_trace])

out, _ = explainer.model(example_input)

predicted_vocab_distributions = tf.gather(out, len(example_vocab_input)-1, axis=1) 
predicted_vocab_distributions_df = pd.DataFrame(predicted_vocab_distributions.numpy().tolist(),columns= list(explainer.model.vocab.vocab_dict.keys()))
max_arg = tf.math.argmax(predicted_vocab_distributions, axis=-1).numpy()[0]
max_prob_vocab = explainer.model.vocab.index_to_vocab(max_arg)
print("Predicted activity with highest probability (%.2f) is \"%s\"" % (predicted_vocab_distributions[0][max_arg].numpy(), max_prob_vocab))

Predicted activity with highest probability (0.68) is "W_Nabellen offertes_COMPLETE"


In [18]:
## Get an completed example
# index_from_test = 10
# example_idx_trace = np.array([ 2, 12,  9, 10 ])
# example_idx_trace = trainer.dataset.collate_fn([ordered_test_idx[index_from_test]])[1][0] # Select one from test set.
# example_idx_trace = trainer.dataset.collate_fn([all_success_case_in_test[success_case_idx]])[1][0] # Select one from success set.

example_vocab_trace = explainer.model.vocab.list_of_index_to_vocab(example_idx_trace.tolist())

### Remove tags
example_idx_trace_without_tags = [ i for i in  example_idx_trace if not i in index_to_remove]
example_vocab_trace_without_tags = explainer.model.vocab.list_of_index_to_vocab(example_idx_trace_without_tags)

print(example_vocab_trace_without_tags)

['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_SCHEDULE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START']


In [19]:
### Set up arguments for CF
feature_names = np.array(["step_%d" % (i+1) for i in range(len(example_vocab_trace_without_tags))])
without_tags_vocabs = [ v for v in list(explainer.vocab.vocab_dict.keys()) if not "<" in v]
cat_vars_without_tag = {}
for k in feature_names:
    cat_vars_without_tag[k] = without_tags_vocabs

In [20]:
class ExtractingLastTimeStampProbDistributionModel(tf.keras.Model):
    '''
    It's a new model classifying where the destination is prefered.
    '''
    def __init__(self, explainer: ExplainingController, desired: int, trace_length: int, without_tags_vocabs):
        super(ExtractingLastTimeStampProbDistributionModel, self).__init__()
        self.explainer = explainer
        self.desired = desired
        self.trace_length = trace_length
        self.all_predicted = []
        self.all_trace = []
        self.all_model_out = []
        self.all_cf_input = []

    def call(self, input):
        '''
        Input will be one-hot encoded tensor.
        '''

        self.all_cf_input.append(input.numpy())

        ### Get real input from the one-hot encoded tensor.
        input = tf.argmax(tf.stack(tf.split(input,self.trace_length, axis=-1,), axis = 1), axis = -1)
        # self.input_before_convert = input
        # print("=========Before Converting=========")
        # print(input)
        # print("=======================")

        ### transfer to the input with tags.
        input = tf.constant(explainer.vocab.list_of_vocab_to_index_2d([[without_tags_vocabs[idx] for idx in tf.squeeze(input).numpy()]]), dtype=tf.int64)
        # self.input_after_convert = input
        # print("=========After Converting=========")
        # print(input)
        # print("=======================")

        # print("=========Input.Shape=========")
        # print(input.shape)
        # print("=======================")

        self.all_trace.append(input.numpy())

        ## Concate the <SOS> tag in the first step.
        input = tf.concat([tf.constant([[2]], dtype=tf.int64) ,  input], axis=-1)

        ## Feed to the model
        out, _ = explainer.model(input)
        self.all_model_out.append(out.numpy())


        self.all_predicted.append(tf.argmax(out[:, -1, :], axis = -1).numpy())

        return out[:, -1, self.desired: self.desired+1]

        ## Take the activty with max possibility.
        out = tf.argmax(out[:, -1, :], axis = -1)

        self.all_predicted.append(out.numpy())

        ## Determine whether the 

        return tf.expand_dims(tf.cast(out == self.desired, dtype=tf.float32), axis = 0)

In [21]:
all_important_vocabs

['A_ACTIVATED_COMPLETE',
 'A_APPROVED_COMPLETE',
 'A_REGISTERED_COMPLETE',
 'A_DECLINED_COMPLETE',
 'O_ACCEPTED_COMPLETE',
 'O_DECLINED_COMPLETE']

In [22]:
desire_vocabs = all_important_vocabs[3]
# desire_vocabs = "O_SENT_COMPLETE" # all_important_vocabs[3]

In [23]:
desire_vocabs

'A_DECLINED_COMPLETE'

In [24]:
last_dist_model = ExtractingLastTimeStampProbDistributionModel(explainer, desired=explainer.vocab.vocab_to_index(desire_vocabs), trace_length = len(example_vocab_trace_without_tags), without_tags_vocabs = without_tags_vocabs)

In [25]:
d = dice_ml.Data(features=cat_vars_without_tag, outcome_name="predicted",continuous_features = [])
m = dice_ml.Model(model= last_dist_model, backend="TF2")
exp = dice_ml.Dice(d, m)

In [26]:
example_df = pd.DataFrame([example_vocab_trace_without_tags], columns= feature_names)

In [27]:
example_df

Unnamed: 0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,step_11,step_12,step_13,step_14,step_15,step_16,step_17,step_18,step_19
0,A_SUBMITTED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_PREACCEPTED_COMPLETE,W_Completeren aanvraag_SCHEDULE,W_Completeren aanvraag_START,W_Completeren aanvraag_COMPLETE,W_Completeren aanvraag_START,W_Completeren aanvraag_COMPLETE,W_Completeren aanvraag_START,A_ACCEPTED_COMPLETE,A_FINALIZED_COMPLETE,O_SELECTED_COMPLETE,O_CREATED_COMPLETE,O_SENT_COMPLETE,W_Nabellen offertes_SCHEDULE,W_Completeren aanvraag_COMPLETE,W_Nabellen offertes_START,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_START


In [28]:
dice_exp = exp.generate_counterfactuals(example_df,
 total_CFs=1,
  desired_class="opposite",
   min_iter=100, max_iter=500,
    # permitted_range= cat_vars_without_tag
    # proximity_weight =1000, #0.5,
    # diversity_weight =2000,#1.0,
    tie_random = True,
    categorical_penalty = 20,
    )

Diverse Counterfactuals found! total time taken: 00 min 58 sec


In [36]:
all_traces = []
#### for the first step how many activities are tried.
for i in range(len(example_vocab_trace_without_tags)):
    # list(set((np.squeeze(np.array(last_dist_model.all_trace))[:, 0]).tolist()))
    trace = explainer.model.vocab.list_of_index_to_vocab(list(set((np.squeeze(np.array(last_dist_model.all_trace))[:, i]).tolist())))
    all_traces.append(trace)

In [38]:
print(all_traces)

[['W_Nabellen offertes_START', 'A_ACCEPTED_COMPLETE', 'A_SUBMITTED_COMPLETE'], ['W_Nabellen offertes_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_CANCELLED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_REGISTERED_COMPLETE', 'O_ACCEPTED_COMPLETE', 'O_DECLINED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen incomplete dossiers_SCHEDULE'], ['W_Nabellen offertes_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Afhandelen leads_START', 'W_Beoordelen fraude_COMPLETE', 'W_Nabellen incomplete dossiers_START'], ['W_Nabellen offertes_SCHEDULE', 'A_ACCEPTED_COMPLETE', 'W_Valideren aanvraag_SCHEDULE', 'W_Valideren aanvraag_START', 'A_DECLINED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'O_CREATED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Nabellen incomplete dossiers_SCHEDULE'], ['A_ACCEPTED_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Wijzigen contractgegevens_SCHEDULE', 'A_PARTLYSUBMITTED_

In [39]:
all_cf_are_same = all([all((last_dist_model.all_cf_input[1] == last_dist_model.all_cf_input[i]).tolist()[0]) for i in range(len(last_dist_model.all_cf_input))][1:])
print("All inputs are the same: %s" % (all_cf_are_same))

All inputs are the same: False


In [31]:
##### All Predicted
from itertools import chain
explainer.model.vocab.list_of_index_to_vocab(list(set(list(chain.from_iterable(last_dist_model.all_predicted)))))

['<EOS>',
 'A_ACTIVATED_COMPLETE',
 'A_APPROVED_COMPLETE',
 'A_CANCELLED_COMPLETE',
 'A_DECLINED_COMPLETE',
 'A_FINALIZED_COMPLETE',
 'A_PARTLYSUBMITTED_COMPLETE',
 'A_REGISTERED_COMPLETE',
 'A_SUBMITTED_COMPLETE',
 'O_ACCEPTED_COMPLETE',
 'O_CANCELLED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_DECLINED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Afhandelen leads_COMPLETE',
 'W_Afhandelen leads_START',
 'W_Beoordelen fraude_COMPLETE',
 'W_Beoordelen fraude_SCHEDULE',
 'W_Beoordelen fraude_START',
 'W_Completeren aanvraag_COMPLETE',
 'W_Completeren aanvraag_SCHEDULE',
 'W_Completeren aanvraag_START',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen incomplete dossiers_SCHEDULE',
 'W_Nabellen incomplete dossiers_START',
 'W_Nabellen offertes_COMPLETE',
 'W_Nabellen offertes_START',
 'W_Valideren aanvraag_COMPLETE',
 'W_Valideren aanvraag_SCHEDULE',
 'W_Valideren aanvraag_START',
 'W_Wijzigen contractgegevens_SCHEDULE']

In [32]:
dice_exp.visualize_as_dataframe(show_only_changes=True, display_sparse_df=False)

Query instance (original outcome : 0)


Unnamed: 0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,step_11,step_12,step_13,step_14,step_15,step_16,step_17,step_18,step_19,predicted
0,A_SUBMITTED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_PREACCEPTED_COMPLETE,W_Completeren aanvraag_SCHEDULE,W_Completeren aanvraag_START,W_Completeren aanvraag_COMPLETE,W_Completeren aanvraag_START,W_Completeren aanvraag_COMPLETE,W_Completeren aanvraag_START,A_ACCEPTED_COMPLETE,A_FINALIZED_COMPLETE,O_SELECTED_COMPLETE,O_CREATED_COMPLETE,O_SENT_COMPLETE,W_Nabellen offertes_SCHEDULE,W_Completeren aanvraag_COMPLETE,W_Nabellen offertes_START,W_Nabellen offertes_COMPLETE,W_Nabellen offertes_START,0.001



Diverse Counterfactual set without sparsity correction since only metadata about each  feature is available (new outcome:  1.0


Unnamed: 0,step_1,step_2,step_3,step_4,step_5,step_6,step_7,step_8,step_9,step_10,step_11,step_12,step_13,step_14,step_15,step_16,step_17,step_18,step_19,predicted
0,A_ACCEPTED_COMPLETE,-,-,-,-,-,-,-,-,W_Nabellen incomplete dossiers_COMPLETE,W_Nabellen incomplete dossiers_START,W_Afhandelen leads_SCHEDULE,A_CANCELLED_COMPLETE,O_DECLINED_COMPLETE,W_Valideren aanvraag_START,O_SELECTED_COMPLETE,W_Valideren aanvraag_COMPLETE,W_Valideren aanvraag_SCHEDULE,W_Completeren aanvraag_START,0.504


In [33]:
print(list(example_df.iloc[0]))

['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_SCHEDULE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_START', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_START']


In [34]:
print(list(dice_exp.final_cfs_df.iloc[0][:-1]))

['A_ACCEPTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_START', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_START', 'W_Afhandelen leads_SCHEDULE', 'A_CANCELLED_COMPLETE', 'O_DECLINED_COMPLETE', 'W_Valideren aanvraag_START', 'O_SELECTED_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Valideren aanvraag_SCHEDULE', 'W_Completeren aanvraag_START']
