In [1]:
from Controller import TrainingController, ExplainingController
from Utils.SaveUtils import load_parameters
from Parameters import TrainingParameters, PredictingParameters
import tensorflow as tf
from IPython.core.display import display, HTML
import json
import numpy as np
import pandas as pd
from Parameters.Enums import TracePermutationStrategies

import dice_ml
from dice_ml.utils import helpers

In [2]:
### Select models to load
folder_path = "./SavedModels/%s" % (
 "0.7924_BPI2012_BaseLineLSTMModel_2021-06-11 17:08:27.383137" # AOW
)

In [3]:
### Setting up parameters
parameters_json = load_parameters(folder_path=folder_path)
parameters = TrainingParameters(**parameters_json)
tf.random.set_seed(parameters.dataset_split_seed)
np.random.seed(parameters.dataset_split_seed)
parameters.load_model_folder_path = folder_path
predicting_parameters = PredictingParameters()
predicting_parameters.load_model_folder_path = folder_path

In [4]:
trainer = TrainingController(parameters = parameters)


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012/AOW 

| Model loaded successfully from: ./SavedModels/0.7924_BPI2012_BaseLineLSTMModel_2021-06-11 17:08:27.383137  


In [5]:
print("========================All Vocabs========================")
trainer.model.vocab.vocabs



['<PAD>',
 '<EOS>',
 '<SOS>',
 'A_ACCEPTED_COMPLETE',
 'A_ACTIVATED_COMPLETE',
 'A_APPROVED_COMPLETE',
 'A_CANCELLED_COMPLETE',
 'A_DECLINED_COMPLETE',
 'A_FINALIZED_COMPLETE',
 'A_PARTLYSUBMITTED_COMPLETE',
 'A_PREACCEPTED_COMPLETE',
 'A_REGISTERED_COMPLETE',
 'A_SUBMITTED_COMPLETE',
 'O_ACCEPTED_COMPLETE',
 'O_CANCELLED_COMPLETE',
 'O_CREATED_COMPLETE',
 'O_DECLINED_COMPLETE',
 'O_SELECTED_COMPLETE',
 'O_SENT_BACK_COMPLETE',
 'O_SENT_COMPLETE',
 'W_Afhandelen leads_COMPLETE',
 'W_Beoordelen fraude_COMPLETE',
 'W_Completeren aanvraag_COMPLETE',
 'W_Nabellen incomplete dossiers_COMPLETE',
 'W_Nabellen offertes_COMPLETE',
 'W_Valideren aanvraag_COMPLETE']

In [6]:
### Identified important activities
a_important_activities = ["A_ACTIVATED_COMPLETE", "A_APPROVED_COMPLETE", "A_REGISTERED_COMPLETE", "A_DECLINED_COMPLETE"]
o_important_activities = ["O_ACCEPTED_COMPLETE", "O_DECLINED_COMPLETE"]
all_important_vocabs = a_important_activities + o_important_activities
print("===============Important Activities===============")
print(all_important_vocabs)

['A_ACTIVATED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_DECLINED_COMPLETE', 'O_ACCEPTED_COMPLETE', 'O_DECLINED_COMPLETE']


In [7]:
########### What's the ending activities and their occurence counts ###########
all_last_steps =  [i[-2] for i in trainer.dataset.df["trace"]]
last_df = pd.DataFrame(all_last_steps, columns=["last_step"])
ending_vocab_counts = {}
ending_value_count_dict = dict(last_df["last_step"].value_counts())
for k in ending_value_count_dict:
    vocab_key = trainer.model.vocab.index_to_vocab(k)
    ending_vocab_counts[vocab_key] = ending_value_count_dict[k]

print(ending_vocab_counts)

{'A_DECLINED_COMPLETE': 3429, 'W_Valideren aanvraag_COMPLETE': 2749, 'W_Afhandelen leads_COMPLETE': 2234, 'W_Completeren aanvraag_COMPLETE': 1939, 'W_Nabellen offertes_COMPLETE': 1291, 'A_CANCELLED_COMPLETE': 655, 'W_Nabellen incomplete dossiers_COMPLETE': 453, 'O_CANCELLED_COMPLETE': 279, 'W_Beoordelen fraude_COMPLETE': 57, 'A_REGISTERED_COMPLETE': 1}


In [8]:
print("Most common ending activities: ")
print(trainer.model.vocab.list_of_index_to_vocab(list(last_df["last_step"].value_counts().keys())))

Most common ending activities: 
['A_DECLINED_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'A_CANCELLED_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'O_CANCELLED_COMPLETE', 'W_Beoordelen fraude_COMPLETE', 'A_REGISTERED_COMPLETE']


In [9]:
########### Get example data from trainer ###########
trainer.test_dataset.unbatch()
ordered_test_idx = (list(trainer.test_dataset.unbatch().as_numpy_iterator()))
ordered_test_idx.sort()
print("Test set size: %d" %(len(ordered_test_idx)))
success_activities = ['O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE']
all_success_case_in_test = []
for i in ordered_test_idx:
    idx_trace = trainer.dataset.collate_fn([i])[1][0]
    vocab_trace = trainer.model.vocab.list_of_index_to_vocab(idx_trace.tolist())
    if any([ s_a in vocab_trace  for s_a in success_activities]):
        all_success_case_in_test.append(i)

all_fail_case_in_test = [ i for i in ordered_test_idx if (i not in all_success_case_in_test) ]
print("Trace count (Successful): %d " % (len(all_success_case_in_test))) 
print("Trace count (Failed): %d " % (len(all_fail_case_in_test))) 

Test set size: 1308
Trace count (Successful): 224 
Trace count (Failed): 1084 


In [10]:
mean_success_length = np.mean([len(trainer.dataset.collate_fn([i])[1][0]) for i in all_success_case_in_test])
mean_fail_length = np.mean([len(trainer.dataset.collate_fn([i])[1][0]) for i in all_fail_case_in_test])
print("Mean trace length (Successful): %.2f " % (mean_success_length)) 
print("Mean trace length (Failed): %.2f " % (mean_fail_length)) 

Mean trace length (Successful): 27.94 
Mean trace length (Failed): 10.53 


In [11]:
success_case_idx = 2
fail_case_idx = 7
print("=========================Accepted=========================")
accpeted_example_idx_trace = trainer.dataset.collate_fn([all_success_case_in_test[success_case_idx]])[1][0]
accpeted_example_vocab_trace = trainer.model.vocab.list_of_index_to_vocab(accpeted_example_idx_trace.tolist())
print(accpeted_example_vocab_trace)
print("=========================Failed===========================")
failed_example_idx_trace = trainer.dataset.collate_fn([all_fail_case_in_test[fail_case_idx]])[1][0]
failed_example_vocab_trace = trainer.model.vocab.list_of_index_to_vocab(failed_example_idx_trace.tolist())
print(failed_example_vocab_trace)

['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'A_APPROVED_COMPLETE', 'O_ACCEPTED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'W_Valideren aanvraag_COMPLETE']
['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMP