In [1]:
from parameters.dataset import BPI2012Parameters
from dataset import BPI2012Dataset
from utils.print import print_block
from utils.bpi2012 import remove_trail_steps, print_model_prediction_result, remove_tags_for_seq, get_example_data_with_removed_tail
import tensorflow as tf
from model import LSTMPredNextModel, LSTMScenarioCfModel
import numpy as np
from dice import EventLogDiCE
from parameters.training import TrainingParameters, LossParameters, OptimizerParameters
from utils.preprocessing import dataset_split
from cf_search import CfSearcher

In [2]:
### Load dataset
dataset = BPI2012Dataset(BPI2012Parameters()) 


| Preprocessed data loaded successfully: ./data/preprocessed/BPI_Challenge_2012_with_resource/AOW_CompleteOnly 


In [3]:
### Prepare the training dataset.
train_params = TrainingParameters(
    stop_epoch=20,
)
train_dataset, test_dataset, validation_dataset = dataset_split(
    list(range(len(dataset))),
    train_params.train_test_split_portion,
    seed=train_params.random_seed,
    shuffle=True
)
training_df = dataset.df.iloc[train_dataset]

In [4]:
# Get example from dataset
example_vocab_activities, example_idx_activities, example_vocab_resources, example_idx_resources, example_amount, ground_truth_vocab = get_example_data_with_removed_tail(dataset, trace_idx=52, tail_length_to_remove=4)


| [['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'W_Valideren aanvraag_COMPLETE']] 

| [['<SOS>', '112', '112', '112', '11180', '11201', '11201', '11201', '11201', '11201', '11201', '11201', '11049', '11049', '10629', '10629', '10629', '10629', '10629']] 

| [15500.0] 

| ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SENT_B

In [5]:
##### load prediction model #####
pred_model = LSTMPredNextModel.load(
    "./SavedModels/%s" % (
    "0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467" 
    )
)
_ = pred_model(**pred_model.get_example_input())
pred_model.summary()


| Model parameters loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467  

| Vocab loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467  

| Model loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467  
Model: "LSTMPredNextModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  832       
_________________________________________________________________
embedding_1 (Embedding)      multiple                  9216      
_________________________________________________________________
lstm (LSTM)                  multiple                  24832     
_________________________________________________________________
lstm_1 (LSTM)                multiple                  33024     
________

In [6]:
##### Get model output #####
example_activities_input = tf.constant(example_idx_activities)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

predicted_df = print_model_prediction_result(pred_model, example_activities_input, example_resources_input, example_amount_input)


| Predicted activity with highest probability (0.44) is "A_REGISTERED_COMPLETE" 



<PAD>                                      3.822472e-06
<EOS>                                      1.483298e-06
<SOS>                                      7.272783e-06
A_ACCEPTED_COMPLETE                        4.374774e-07
A_ACTIVATED_COMPLETE                       1.337052e-01
A_APPROVED_COMPLETE                        4.277948e-01
A_CANCELLED_COMPLETE                       2.484289e-07
A_DECLINED_COMPLETE                        9.023657e-06
A_FINALIZED_COMPLETE                       1.034867e-04
A_PARTLYSUBMITTED_COMPLETE                 1.634951e-04
A_PREACCEPTED_COMPLETE                     1.066932e-06
A_REGISTERED_COMPLETE                      4.373636e-01
A_SUBMITTED_COMPLETE                       8.535364e-06
O_ACCEPTED_COMPLETE                        2.829449e-04
O_CANCELLED_COMPLETE                       3.999644e-04
O_CREATED_COMPLETE                         7.831535e-06
O_DECLINED_COMPLETE

In [7]:
## initialise searcher
searcher = CfSearcher(training_df, pred_model)

In [20]:
## 3 ways to search

# 1. amount = example_amount, replace_amount = None (Find cases with same milestones and amount)
amount = example_amount[0]
replace_amount = None

# 2. amount = None, replace_aomunt = None (Find cases with same milstones, and use the amount in dataset)
# amount = None
# replace_amount = None

# 3. amount = None, replace_amount = example_amonut (Find cases with same milstones, and replace their amount by replace_amount)
# amount = None
# replace_amount = example_amount[0]

desired_df, cf  = searcher.search(
    example_vocab_activities,
    desired=ground_truth_vocab,
    amount=amount,
    replace_amount=replace_amount
)

# {counterfactual(cf) or all_prediction(desired_df)}_{ground truth}_Amount_{amount argument value}_ReplaceAmount_{replace amount argment value}

In [21]:
desired_df.head(5)

Unnamed: 0,activity,activity_vocab,caseid,amount,resource,resource_vocab,predicted_vocab,predicted_value,lengths
7294,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",196422,15500.0,"[70, 53, 53, 53, 38, 38, 38, 38, 38, 38, 41, 41]","[<SOS>, 112, 112, 112, 11009, 11009, 11009, 11...",W_Valideren aanvraag_COMPLETE,0.652488,12
1453,"[2, 12, 9, 10, 3, 17, 8, 15, 19, 22, 18, 24]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",178257,15500.0,"[70, 53, 53, 53, 50, 50, 50, 50, 50, 50, 41, 41]","[<SOS>, 112, 112, 112, 11180, 11180, 11180, 11...",W_Valideren aanvraag_COMPLETE,0.674835,12
4909,"[2, 12, 9, 10, 3, 17, 8, 15, 19, 22, 18, 24]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",188998,15500.0,"[70, 53, 53, 53, 27, 27, 27, 27, 27, 27, 41, 41]","[<SOS>, 112, 112, 112, 10932, 10932, 10932, 10...",W_Valideren aanvraag_COMPLETE,0.636611,12
10780,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",207278,15500.0,"[70, 53, 53, 53, 48, 48, 48, 48, 48, 48, 59, 59]","[<SOS>, 112, 112, 112, 11169, 11169, 11169, 11...",W_Valideren aanvraag_COMPLETE,0.73033,12
2906,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",182780,15500.0,"[70, 53, 53, 53, 7, 7, 7, 7, 7, 7, 7, 7]","[<SOS>, 112, 112, 112, 10629, 10629, 10629, 10...",W_Valideren aanvraag_COMPLETE,0.606881,12


In [22]:
cf.head(5)

Unnamed: 0,activity,activity_vocab,caseid,amount,resource,resource_vocab,predicted_vocab,predicted_value,lengths
3774,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24, 11]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",185461,15500.0,"[70, 53, 53, 53, 3, 3, 3, 3, 3, 3, 3, 3, 3]","[<SOS>, 112, 112, 112, 10138, 10138, 10138, 10...",A_APPROVED_COMPLETE,0.605262,13
1580,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24, 11]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",178659,15500.0,"[70, 53, 53, 53, 49, 49, 49, 49, 49, 49, 40, 7...","[<SOS>, 112, 112, 112, 11179, 11179, 11179, 11...",A_APPROVED_COMPLETE,0.614594,13
3865,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24, 13]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",185740,15500.0,"[70, 53, 53, 53, 48, 48, 48, 48, 48, 48, 19, 1...","[<SOS>, 112, 112, 112, 11169, 11169, 11169, 11...",A_APPROVED_COMPLETE,0.550128,13
12426,"[2, 12, 9, 10, 3, 8, 17, 15, 19, 22, 18, 24, 4]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",212355,15500.0,"[70, 53, 53, 53, 63, 63, 63, 63, 63, 63, 19, 1...","[<SOS>, 112, 112, 112, 11300, 11300, 11300, 11...",A_APPROVED_COMPLETE,0.658343,13
7617,"[2, 12, 9, 10, 3, 17, 8, 15, 19, 22, 18, 24, 13]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",197425,15500.0,"[70, 53, 53, 53, 25, 25, 25, 25, 25, 25, 59, 5...","[<SOS>, 112, 112, 112, 10929, 10929, 10929, 10...",A_APPROVED_COMPLETE,0.625466,13


In [23]:
desired_df['predicted_vocab'].value_counts()

A_APPROVED_COMPLETE                        1031
A_REGISTERED_COMPLETE                       312
W_Valideren aanvraag_COMPLETE               263
W_Nabellen incomplete dossiers_COMPLETE     107
O_ACCEPTED_COMPLETE                          85
A_ACTIVATED_COMPLETE                          6
Name: predicted_vocab, dtype: int64

In [24]:
desired_df.to_csv(f"./cf_searching_result/desired_{ground_truth_vocab}_Amount_{amount}_ReplaceAmount_{replace_amount}_result.csv")

In [25]:
cf.to_csv(f"./cf_searching_result/cf_{ground_truth_vocab}_Amount_{amount}_ReplaceAmount_{replace_amount}_result.csv")