In [1]:
from Controller import TrainingController, ExplainingController
from Utils.SaveUtils import load_parameters
from Parameters import TrainingParameters, PredictingParameters
import tensorflow as tf
from IPython.core.display import display, HTML
import json
import numpy as np
import pandas as pd
from Parameters.Enums import TracePermutationStrategies
from Models import DiCEBinaryOutputModelWithResource
from itertools import chain

import dice_ml
from dice_ml.utils import helpers

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False
seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)

TF version:  2.4.0-rc0
Eager execution enabled:  True


In [2]:
folder_path = "./SavedModels/%s" % (
    "0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443" # AOW
)

In [3]:
parameters_json = load_parameters(folder_path=folder_path)
parameters = TrainingParameters(**parameters_json)
tf.random.set_seed(parameters.dataset_split_seed)
np.random.seed(parameters.dataset_split_seed)
parameters.load_model_folder_path = folder_path
predicting_parameters = PredictingParameters()
predicting_parameters.load_model_folder_path = folder_path
 

In [4]:
trainer = TrainingController(parameters = parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012_with_resource/AOW 

| Model loaded successfully from: ./SavedModels/0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443  


In [5]:
explainer = ExplainingController(parameters=parameters, predicting_parameters= predicting_parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Model loaded successfully from: ./SavedModels/0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443  


In [6]:
########### Get example data from trainer ###########
ordered_test_idx = (list(trainer.test_dataset.unbatch().as_numpy_iterator()))
ordered_test_idx.sort()
print("Test set length: %d" %(len(ordered_test_idx)))

Test set length: 1309


In [7]:
caseids, example_data, example_lengths, example_resources, example_amount, _ = trainer.dataset.collate_fn([ordered_test_idx[4]])

In [8]:
remove_trail_steps = 3
example_idx_trace = np.array([example_data[0][:-remove_trail_steps]])
example_idx_resources = np.array([example_resources[0][:-remove_trail_steps]])
example_vocab_trace = trainer.model.vocab.list_of_index_to_vocab_2d(example_idx_trace)[0]

In [9]:
example_vocab_trace # A_DECLINED_COMPLETE

['<SOS>',
 'A_SUBMITTED_COMPLETE',
 'A_PARTLYSUBMITTED_COMPLETE',
 'A_PREACCEPTED_COMPLETE',
 'W_Afhandelen leads_COMPLETE',
 'A_ACCEPTED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'A_FINALIZED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Completeren aanvraag_COMPLETE',
 'W_Nabellen offertes_COMPLETE']

In [10]:
example_input = tf.constant(example_idx_trace)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

In [11]:
out, _ = explainer.model(example_input, input_resources = example_resources_input, amount = example_amount_input)

In [12]:
predicted_vocab_distributions = tf.gather(out, len(example_vocab_trace)-1, axis=1)
predicted_vocab_distributions_df = pd.DataFrame(predicted_vocab_distributions.numpy().tolist(),columns=explainer.model.vocab.vocabs)
max_arg = tf.math.argmax(predicted_vocab_distributions, axis=-1).numpy()[0]
max_prob_vocab = explainer.model.vocab.index_to_vocab(max_arg)
print("Predicted activity with highest probability (%.2f) is \"%s\"" % (predicted_vocab_distributions[0][max_arg].numpy(), max_prob_vocab))
display(predicted_vocab_distributions_df)
print(predicted_vocab_distributions_df.iloc[0])

Predicted activity with highest probability (0.75) is "W_Nabellen offertes_COMPLETE"


Unnamed: 0,<PAD>,<EOS>,<SOS>,A_ACCEPTED_COMPLETE,A_ACTIVATED_COMPLETE,A_APPROVED_COMPLETE,A_CANCELLED_COMPLETE,A_DECLINED_COMPLETE,A_FINALIZED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,...,O_DECLINED_COMPLETE,O_SELECTED_COMPLETE,O_SENT_BACK_COMPLETE,O_SENT_COMPLETE,W_Afhandelen leads_COMPLETE,W_Beoordelen fraude_COMPLETE,W_Completeren aanvraag_COMPLETE,W_Nabellen incomplete dossiers_COMPLETE,W_Nabellen offertes_COMPLETE,W_Valideren aanvraag_COMPLETE
0,4e-06,0.000358,2e-06,3.1e-05,3.7e-05,0.000167,0.020353,0.002481,1e-05,1.9e-05,...,0.000748,0.015224,0.153527,5e-06,4.7e-05,1.7e-05,0.00011,1.8e-05,0.753731,7e-05


<PAD>                                      0.000004
<EOS>                                      0.000358
<SOS>                                      0.000002
A_ACCEPTED_COMPLETE                        0.000031
A_ACTIVATED_COMPLETE                       0.000037
A_APPROVED_COMPLETE                        0.000167
A_CANCELLED_COMPLETE                       0.020353
A_DECLINED_COMPLETE                        0.002481
A_FINALIZED_COMPLETE                       0.000010
A_PARTLYSUBMITTED_COMPLETE                 0.000019
A_PREACCEPTED_COMPLETE                     0.000052
A_REGISTERED_COMPLETE                      0.000045
A_SUBMITTED_COMPLETE                       0.000039
O_ACCEPTED_COMPLETE                        0.000105
O_CANCELLED_COMPLETE                       0.052788
O_CREATED_COMPLETE                         0.000012
O_DECLINED_COMPLETE                        0.000748
O_SELECTED_COMPLETE                        0.015224
O_SENT_BACK_COMPLETE                       0.153527
O_SENT_COMPL

In [13]:
indexes_to_remove_from_resource = [explainer.resources.index('<PAD>'), explainer.resources.index('<SOS>'),explainer.resources.index('<EOS>')]

In [14]:
example_idx_trace_without_tags = [ i for i in  example_idx_trace[0] if not i in explainer.vocab.tags_idx()]
example_vocab_trace_without_tags = explainer.model.vocab.list_of_index_to_vocab(example_idx_trace_without_tags)

example_idx_resources_without_tags = [ i for i in  example_idx_resources[0] if not i in indexes_to_remove_from_resource ]

example_vocab_resource_without_tags = [
   explainer.resources[r]  for r in example_idx_resources_without_tags
]

print("=============Example activities without tags=============")
print(example_vocab_trace_without_tags)

print("=============Example resources without tags=============")
print(example_vocab_resource_without_tags)

['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_COMPLETE']
['112', '112', '11001', '11001', '11120', '11120', '11120', '11120', '11120', 'UNKNOWN', 'UNKNOWN']


In [15]:
activity_feature_names = np.array(["step_%d_activity" % (i+1) for i in range(len(example_vocab_trace_without_tags))])
without_tags_vocabs = [ v for v in explainer.vocab.vocabs if not "<" in v]
activity_cat_vars_without_tag = {}
for k in activity_feature_names:
    activity_cat_vars_without_tag[k] = without_tags_vocabs

resource_feature_names = np.array(["step_%d_resource" % (i+1) for i in range(len(example_vocab_trace_without_tags))])
without_tags_resources = [r for r in explainer.resources if not "<" in r ]
resources_cat_vars_without_tag = {}
for k in resource_feature_names:
    resources_cat_vars_without_tag[k] = without_tags_resources

In [16]:
desired_activity = 'A_APPROVED_COMPLETE'
print("Desired activity is \"%s\"" %(desired_activity))

Desired activity is "A_APPROVED_COMPLETE"


In [23]:
all_features_and_range = {**activity_cat_vars_without_tag, **resources_cat_vars_without_tag,"amount": [0, 99999.0]}

In [27]:
dice_binary_model = DiCEBinaryOutputModelWithResource(explainer.model, explainer.vocab, desired=explainer.vocab.vocab_to_index(desired_activity),trace_length = len(example_vocab_trace_without_tags), without_tags_vocabs = without_tags_vocabs, without_tags_resources=without_tags_resources, resources= explainer.resources, sos_idx_activity=explainer.vocab.vocab_to_index("<SOS>"), sos_idx_resource= explainer.resources.index('<SOS>'), amount_min = all_features_and_range['amount'][0], amount_max = all_features_and_range['amount'][1])

In [25]:
d = dice_ml.Data(features=all_features_and_range, outcome_name="predicted",continuous_features = ['amount'])
m = dice_ml.Model(model= dice_binary_model, backend="TF2")
exp = dice_ml.Dice(d, m)

NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.

In [None]:
feature_names = activity_feature_names.tolist() + resource_feature_names.tolist() + ['amount']

In [None]:
query_instance = [example_vocab_trace_without_tags + example_vocab_resource_without_tags + example_amount]
example_df = pd.DataFrame(query_instance, columns= feature_names)

In [None]:
dice_exp = exp.generate_counterfactuals(
        example_df,
        total_CFs=1,
        verbose=True,
        min_iter=100,
        max_iter=600,
        desired_class="opposite",
        features_to_vary="all",
        algorithm = "DiverseCF", # DiverseCF, RandomInitCF
        proximity_weight=0.1, #0.5,
        diversity_weight=50000,#1.0,
        init_near_query_instance=True,
        tie_random = True,
        categorical_penalty = 1,
        learning_rate=0.0005,
    )