In [1]:
from Controller import TrainingController, ExplainingController
from Utils.SaveUtils import load_parameters
from Parameters import TrainingParameters, PredictingParameters
import tensorflow as tf
from IPython.core.display import display, HTML
import json
import numpy as np
import pandas as pd
from Parameters.Enums import TracePermutationStrategies
from Models import DiCEBinaryOutputModelWithResource
from itertools import chain

import dice_ml
from dice_ml.utils import helpers

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False
seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)

TF version:  2.4.0-rc0
Eager execution enabled:  True


In [2]:
folder_path = "./SavedModels/%s" % (
    "0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443" # AOW
)

In [3]:
parameters_json = load_parameters(folder_path=folder_path)
parameters = TrainingParameters(**parameters_json)
tf.random.set_seed(parameters.dataset_split_seed)
np.random.seed(parameters.dataset_split_seed)
parameters.load_model_folder_path = folder_path
predicting_parameters = PredictingParameters()
predicting_parameters.load_model_folder_path = folder_path
 

In [4]:
trainer = TrainingController(parameters = parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012_with_resource/AOW 

| Model loaded successfully from: ./SavedModels/0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443  


In [5]:
explainer = ExplainingController(parameters=parameters, predicting_parameters= predicting_parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Model loaded successfully from: ./SavedModels/0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443  


In [6]:
########### Get example data from trainer ###########
ordered_test_idx = (list(trainer.test_dataset.unbatch().as_numpy_iterator()))
ordered_test_idx.sort()
print("Test set length: %d" %(len(ordered_test_idx)))

Test set length: 1309


In [7]:
caseids, example_data, example_lengths, example_resources, example_amount, _ = trainer.dataset.collate_fn([ordered_test_idx[5]])

In [8]:
example_data.shape

(1, 37)

In [9]:
explainer.vocab.list_of_index_to_vocab_2d(example_data)

[['<SOS>',
  'A_SUBMITTED_COMPLETE',
  'A_PARTLYSUBMITTED_COMPLETE',
  'A_PREACCEPTED_COMPLETE',
  'W_Afhandelen leads_COMPLETE',
  'W_Completeren aanvraag_COMPLETE',
  'A_ACCEPTED_COMPLETE',
  'A_FINALIZED_COMPLETE',
  'O_SELECTED_COMPLETE',
  'O_CREATED_COMPLETE',
  'O_SENT_COMPLETE',
  'W_Completeren aanvraag_COMPLETE',
  'W_Nabellen offertes_COMPLETE',
  'W_Nabellen offertes_COMPLETE',
  'W_Nabellen offertes_COMPLETE',
  'W_Nabellen offertes_COMPLETE',
  'W_Nabellen offertes_COMPLETE',
  'O_SENT_BACK_COMPLETE',
  'W_Nabellen offertes_COMPLETE',
  'W_Valideren aanvraag_COMPLETE',
  'W_Valideren aanvraag_COMPLETE',
  'W_Nabellen incomplete dossiers_COMPLETE',
  'O_CANCELLED_COMPLETE',
  'O_SELECTED_COMPLETE',
  'O_CREATED_COMPLETE',
  'O_SENT_COMPLETE',
  'W_Nabellen incomplete dossiers_COMPLETE',
  'W_Nabellen incomplete dossiers_COMPLETE',
  'W_Nabellen incomplete dossiers_COMPLETE',
  'W_Nabellen incomplete dossiers_COMPLETE',
  'O_SENT_BACK_COMPLETE',
  'W_Nabellen incomplete dos

In [10]:
remove_trail_steps = 2
example_idx_trace = np.array([example_data[0][:-remove_trail_steps]])
example_idx_resources = np.array([example_resources[0][:-remove_trail_steps]])
example_vocab_trace = trainer.model.vocab.list_of_index_to_vocab_2d(example_idx_trace)[0]

In [11]:
example_vocab_trace 

['<SOS>',
 'A_SUBMITTED_COMPLETE',
 'A_PARTLYSUBMITTED_COMPLETE',
 'A_PREACCEPTED_COMPLETE',
 'W_Afhandelen leads_COMPLETE',
 'W_Completeren aanvraag_COMPLETE',
 'A_ACCEPTED_COMPLETE',
 'A_FINALIZED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Completeren aanvraag_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'O_SENT_BACK_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Valideren aanvraag_COMPLETE',
 'W_Valideren aanvraag_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'O_CANCELLED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'O_SENT_BACK_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'O_ACCEPTED_CO

In [12]:
example_input = tf.constant(example_idx_trace)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

In [13]:
out, _ = explainer.model(example_input, input_resources = example_resources_input, amount = example_amount_input)

In [14]:
predicted_vocab_distributions = tf.gather(out, len(example_vocab_trace)-1, axis=1)
predicted_vocab_distributions_df = pd.DataFrame(predicted_vocab_distributions.numpy().tolist(),columns=explainer.model.vocab.vocabs)
max_arg = tf.math.argmax(predicted_vocab_distributions, axis=-1).numpy()[0]
max_prob_vocab = explainer.model.vocab.index_to_vocab(max_arg)
print("Predicted activity with highest probability (%.2f) is \"%s\"" % (predicted_vocab_distributions[0][max_arg].numpy(), max_prob_vocab))
display(predicted_vocab_distributions_df)
print(predicted_vocab_distributions_df.iloc[0])

Predicted activity with highest probability (1.00) is "A_ACTIVATED_COMPLETE"


Unnamed: 0,<PAD>,<EOS>,<SOS>,A_ACCEPTED_COMPLETE,A_ACTIVATED_COMPLETE,A_APPROVED_COMPLETE,A_CANCELLED_COMPLETE,A_DECLINED_COMPLETE,A_FINALIZED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,...,O_DECLINED_COMPLETE,O_SELECTED_COMPLETE,O_SENT_BACK_COMPLETE,O_SENT_COMPLETE,W_Afhandelen leads_COMPLETE,W_Beoordelen fraude_COMPLETE,W_Completeren aanvraag_COMPLETE,W_Nabellen incomplete dossiers_COMPLETE,W_Nabellen offertes_COMPLETE,W_Valideren aanvraag_COMPLETE
0,4.614226e-12,1.789443e-10,1.439743e-10,3.851426e-09,0.99792,0.000167,2.115873e-07,3e-06,4.054506e-10,1.483202e-11,...,9.100694e-07,6.379963e-07,1.383656e-10,1.061643e-10,8.480251e-10,7.571456e-12,1.022284e-08,1e-06,3.6667e-11,3.4e-05


<PAD>                                      4.614226e-12
<EOS>                                      1.789443e-10
<SOS>                                      1.439743e-10
A_ACCEPTED_COMPLETE                        3.851426e-09
A_ACTIVATED_COMPLETE                       9.979196e-01
A_APPROVED_COMPLETE                        1.670038e-04
A_CANCELLED_COMPLETE                       2.115873e-07
A_DECLINED_COMPLETE                        2.833510e-06
A_FINALIZED_COMPLETE                       4.054506e-10
A_PARTLYSUBMITTED_COMPLETE                 1.483202e-11
A_PREACCEPTED_COMPLETE                     1.400106e-08
A_REGISTERED_COMPLETE                      1.805327e-03
A_SUBMITTED_COMPLETE                       6.897296e-08
O_ACCEPTED_COMPLETE                        6.786489e-05
O_CANCELLED_COMPLETE                       4.381851e-10
O_CREATED_COMPLETE                         1.440350e-10
O_DECLINED_COMPLETE                        9.100694e-07
O_SELECTED_COMPLETE                        6.379

In [15]:
indexes_to_remove_from_resource = [explainer.resources.index('<PAD>'), explainer.resources.index('<SOS>'),explainer.resources.index('<EOS>')]

In [16]:
example_idx_trace_without_tags = [ i for i in  example_idx_trace[0] if not i in explainer.vocab.tags_idx()]
example_vocab_trace_without_tags = explainer.model.vocab.list_of_index_to_vocab(example_idx_trace_without_tags)

example_idx_resources_without_tags = [ i for i in  example_idx_resources[0] if not i in indexes_to_remove_from_resource ]

example_vocab_resource_without_tags = [
   explainer.resources[r]  for r in example_idx_resources_without_tags
]

print("=============Example activities without tags=============")
print(example_vocab_trace_without_tags)

print("=============Example resources without tags=============")
print(example_vocab_resource_without_tags)

['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGIST

In [17]:
activity_feature_names = np.array(["step_%d_activity" % (i+1) for i in range(len(example_vocab_trace_without_tags))])
without_tags_vocabs = [ v for v in explainer.vocab.vocabs if not "<" in v]
activity_cat_vars_without_tag = {}
for k in activity_feature_names:
    activity_cat_vars_without_tag[k] = without_tags_vocabs

resource_feature_names = np.array(["step_%d_resource" % (i+1) for i in range(len(example_vocab_trace_without_tags))])
without_tags_resources = [r for r in explainer.resources if not "<" in r ]
resources_cat_vars_without_tag = {}
for k in resource_feature_names:
    resources_cat_vars_without_tag[k] = without_tags_resources

In [18]:
desired_activity = 'A_DECLINED_COMPLETE' # A_DECLINED_COMPLETE, A_APPROVED_COMPLETE
print("Desired activity is \"%s\"" %(desired_activity))

Desired activity is "A_DECLINED_COMPLETE"


In [19]:
all_features_and_range = {**activity_cat_vars_without_tag, **resources_cat_vars_without_tag,"amount": [0, 99999.0]}

In [20]:
dice_binary_model = DiCEBinaryOutputModelWithResource(explainer.model, explainer.vocab, desired=explainer.vocab.vocab_to_index(desired_activity),trace_length = len(example_vocab_trace_without_tags), without_tags_vocabs = without_tags_vocabs, without_tags_resources=without_tags_resources, resources= explainer.resources, sos_idx_activity=explainer.vocab.vocab_to_index("<SOS>"), sos_idx_resource= explainer.resources.index('<SOS>'), amount_min = all_features_and_range['amount'][0], amount_max = all_features_and_range['amount'][1])

In [21]:
d = dice_ml.Data(features=all_features_and_range, outcome_name="predicted",continuous_features = ['amount'])
m = dice_ml.Model(model=dice_binary_model, backend="TF2")
exp = dice_ml.Dice(d, m)

Origin Amount
tf.Tensor([[0.9594629]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[95945.33]], shape=(1, 1), dtype=float32)


In [22]:
feature_names = activity_feature_names.tolist() + resource_feature_names.tolist() + ['amount']

In [23]:
query_instance = [example_vocab_trace_without_tags + example_vocab_resource_without_tags + example_amount]
example_df = pd.DataFrame(query_instance, columns= feature_names)

In [22]:
dice_exp = exp.generate_counterfactuals(
        example_df,
        total_CFs=1,
        verbose=True,
        min_iter=100,
        max_iter=3000,
        desired_class="opposite",
        features_to_vary="all",
        algorithm = "RandomInitCF", # DiverseCF, RandomInitCF
        # proximity_weight=0.1, #0.5,
        # diversity_weight=50000,#1.0,
        # init_near_query_instance=True,
        # tie_random = True,
        # categorical_penalty = 1,
        # learning_rate=0.0005,
    )

32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Amount scale back
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
Origin Amount
tf.Tensor([[0.]], shape=(1, 1), dty

KeyboardInterrupt: 

In [27]:
#### Check what permutations are fed into the model. ####
all_received_activities = []
for i in range(len(example_vocab_trace_without_tags)):
    trace = explainer.model.vocab.list_of_index_to_vocab(list(set((np.squeeze(np.array(dice_binary_model.all_trace))[:, i]).tolist())))
    all_received_activities.append(trace)
print(all_received_activities)
 

[['A_SUBMITTED_COMPLETE'], ['A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE'], ['A_PREACCEPTED_COMPLETE'], ['O_SELECTED_COMPLETE', 'W_Afhandelen leads_COMPLETE'], ['A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE'], ['O_SELECTED_COMPLETE', 'A_SUBMITTED_COMPLETE'], ['A_FINALIZED_COMPLETE', 'O_CANCELLED_COMPLETE'], ['A_CANCELLED_COMPLETE', 'O_CREATED_COMPLETE'], ['O_SENT_COMPLETE', 'O_CANCELLED_COMPLETE'], ['O_DECLINED_COMPLETE', 'W_Completeren aanvraag_COMPLETE'], ['W_Nabellen offertes_COMPLETE', 'A_SUBMITTED_COMPLETE']]


In [26]:
#### Check what permutations are fed into the model. ####
all_received_resources = []
for i in range(len(example_vocab_trace_without_tags)):
    res = [ explainer.resources[r] for r in list(set((np.squeeze(np.array(dice_binary_model.all_resource))[:, i]).tolist()))]
    all_received_resources.append(res)
print(all_received_resources)

[['10789', '112'], ['112', '11200'], ['10779', '11001'], ['11001', '10913'], ['11120', '11121'], ['10972', '11120'], ['11120', '11289'], ['10859', '11120'], ['11120', '11201'], ['11302', 'UNKNOWN'], ['10881', 'UNKNOWN']]


In [None]:
########## Checking if all the permutation are the same (Shouldn't be) ##########
all_cf_are_same = all([all((dice_binary_model.all_cf_input[1] == dice_binary_model.all_cf_input[i]).tolist()[0][:, 1:]) for i in range(len(dice_bi
nary_model.all_cf_input))][1:])
print("All inputs are the same: %s" % (all_cf_are_same))

In [28]:
########## Checking if all the trace & resource (except amount) are the same (Shouldn't be) ##########
all_trace_and_resource_are_same = all([all((dice_binary_model.all_cf_input[1][:, 1:] == dice_binary_model.all_cf_input[i][:, 1:]).tolist()[0]) for
 i in range(len(dice_binary_model.all_cf_input))][1:])
print("All trace and resource are the same: %s" % (all_trace_and_resource_are_same))

All trace and resource are the same: True
