In [1]:
from parameters.dataset import BPI2012Parameters
from dataset import BPI2012Dataset
from utils.print import print_block
from utils.bpi2012 import remove_trail_steps, print_model_prediction_result, remove_tags_for_seq, get_example_data_with_removed_tail
import tensorflow as tf
from model import LSTMPredNextModel, LSTMScenarioCfModel
import numpy as np
from dice import EventLogDiCE
from parameters.training import TrainingParameters, LossParameters, OptimizerParameters
from utils.preprocessing import dataset_split
from cf_search import CfSearcher

In [2]:
### Load dataset
dataset = BPI2012Dataset(BPI2012Parameters()) 


| Preprocessed data loaded successfully: ./data/preprocessed/BPI_Challenge_2012_with_resource/AOW_CompleteOnly 


In [3]:
### Prepare the training dataset.
train_params = TrainingParameters(
    stop_epoch=20,
)
train_dataset, test_dataset, validation_dataset = dataset_split(
    list(range(len(dataset))),
    train_params.train_test_split_portion,
    seed=train_params.random_seed,
    shuffle=True
)
training_df = dataset.df.iloc[train_dataset]

In [6]:
# Get example from dataset
# test_trace_idx = 52
test_trace_idx = 52

tail_length_to_remove = 13
# 4 -> A_APPROVED_COMPLETE
# 12 -> A_FINALIZED_COMPLETE
# 13 -> A_ACCEPTED_COMPLETE
example_vocab_activities, example_idx_activities, example_vocab_resources, example_idx_resources, example_amount, ground_truth_vocab = get_example_data_with_removed_tail(
    dataset, trace_idx=test_trace_idx, tail_length_to_remove=tail_length_to_remove)



| [['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'W_Valideren aanvraag_COMPLETE']] 

| [['<SOS>', '112', '112', '112', '11180', '11201', '11201', '11201', '11201', '11201', '11201', '11201', '11049', '11049', '10629', '10629', '10629', '10629', '10629']] 

| [15500.0] 

| ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE'] 

| ['<SOS>', '112', '112', '112', '11180', '11201'] 

| A_ACCEPTED_COMPLETE 


In [7]:
##### load prediction model #####
pred_model = LSTMPredNextModel.load(
    "./SavedModels/%s" % (
    "0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467" 
    )
)
_ = pred_model(**pred_model.get_example_input())
pred_model.summary()


| Model parameters loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467  

| Vocab loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467  

| Model loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20:45:16.353467  
Model: "LSTMPredNextModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  832       
_________________________________________________________________
embedding_3 (Embedding)      multiple                  9216      
_________________________________________________________________
lstm_4 (LSTM)                multiple                  24832     
_________________________________________________________________
lstm_5 (LSTM)                multiple                  33024     
________

In [8]:
##### Get model output #####
example_activities_input = tf.constant(example_idx_activities)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

predicted_df = print_model_prediction_result(pred_model, example_activities_input, example_resources_input, example_amount_input)


| Predicted activity with highest probability (0.69) is "W_Completeren aanvraag_COMPLETE" 



<PAD>                                      1.174409e-07
<EOS>                                      3.132308e-05
<SOS>                                      1.881654e-07
A_ACCEPTED_COMPLETE                        2.363838e-01
A_ACTIVATED_COMPLETE                       1.805911e-06
A_APPROVED_COMPLETE                        2.348171e-06
A_CANCELLED_COMPLETE                       6.621396e-02
A_DECLINED_COMPLETE                        1.187625e-02
A_FINALIZED_COMPLETE                       9.158659e-06
A_PARTLYSUBMITTED_COMPLETE                 1.821592e-07
A_PREACCEPTED_COMPLETE                     1.974105e-07
A_REGISTERED_COMPLETE                      2.050257e-06
A_SUBMITTED_COMPLETE                       1.642594e-06
O_ACCEPTED_COMPLETE                        1.552491e-05
O_CANCELLED_COMPLETE                       5.550547e-06
O_CREATED_COMPLETE                         4.282120e-08
O_DECLINE

In [9]:
## initialise searcher
searcher = CfSearcher(training_df, pred_model)

In [10]:
## 3 ways to search

# 1. amount = example_amount, replace_amount = None (Find cases with same milestones and amount)
# amount = example_amount[0]
# replace_amount = None

# 2. amount = None, replace_aomunt = None (Find cases with same milstones, and use the amount in dataset)
# amount = None
# replace_amount = None

# 3. amount = None, replace_amount = example_amonut (Find cases with same milstones, and replace their amount by replace_amount)
amount = None
replace_amount = example_amount[0]

desired_df, cf  = searcher.search(
    example_vocab_activities,
    desired=ground_truth_vocab,
    amount=amount,
    replace_amount=replace_amount
)

# {counterfactual(cf) or all_prediction(desired_df)}_{ground truth}_Amount_{amount argument value}_ReplaceAmount_{replace amount argment value}

In [11]:
desired_df.head(5)

Unnamed: 0,activity,activity_vocab,caseid,amount,resource,resource_vocab,predicted_vocab,predicted_value,lengths,activity_sparcity
5644,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",191278,15500.0,"[70, 53, 53, 53, 25, 25]","[<SOS>, 112, 112, 112, 10929, 10929]",W_Completeren aanvraag_COMPLETE,0.682024,6,0
5117,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",189625,15500.0,"[70, 53, 53, 53, 44, 71]","[<SOS>, 112, 112, 112, 11119, UNKNOWN]",W_Completeren aanvraag_COMPLETE,0.639215,6,0
4732,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",188416,15500.0,"[70, 53, 53, 53, 21, 48]","[<SOS>, 112, 112, 112, 10910, 11169]",W_Completeren aanvraag_COMPLETE,0.748217,6,0
6041,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",192512,15500.0,"[70, 53, 53, 53, 22, 22]","[<SOS>, 112, 112, 112, 10912, 10912]",W_Completeren aanvraag_COMPLETE,0.657041,6,0
6591,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",194206,15500.0,"[70, 53, 53, 53, 33, 34]","[<SOS>, 112, 112, 112, 10982, 11000]",W_Completeren aanvraag_COMPLETE,0.746058,6,0


In [14]:
desired_df[ desired_df.activity_sparcity ==desired_df.activity_sparcity.min()]

Unnamed: 0,activity,activity_vocab,caseid,amount,resource,resource_vocab,predicted_vocab,predicted_value,lengths,activity_sparcity
5644,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",191278,15500.0,"[70, 53, 53, 53, 25, 25]","[<SOS>, 112, 112, 112, 10929, 10929]",W_Completeren aanvraag_COMPLETE,0.682024,6,0
5117,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",189625,15500.0,"[70, 53, 53, 53, 44, 71]","[<SOS>, 112, 112, 112, 11119, UNKNOWN]",W_Completeren aanvraag_COMPLETE,0.639215,6,0
4732,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",188416,15500.0,"[70, 53, 53, 53, 21, 48]","[<SOS>, 112, 112, 112, 10910, 11169]",W_Completeren aanvraag_COMPLETE,0.748217,6,0
6041,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",192512,15500.0,"[70, 53, 53, 53, 22, 22]","[<SOS>, 112, 112, 112, 10912, 10912]",W_Completeren aanvraag_COMPLETE,0.657041,6,0
6591,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",194206,15500.0,"[70, 53, 53, 53, 33, 34]","[<SOS>, 112, 112, 112, 10982, 11000]",W_Completeren aanvraag_COMPLETE,0.746058,6,0
...,...,...,...,...,...,...,...,...,...,...
8647,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",200623,15500.0,"[70, 53, 53, 53, 18, 13]","[<SOS>, 112, 112, 112, 10889, 10861]",W_Completeren aanvraag_COMPLETE,0.725240,6,0
1808,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",179387,15500.0,"[70, 53, 53, 53, 71, 15]","[<SOS>, 112, 112, 112, UNKNOWN, 10863]",W_Completeren aanvraag_COMPLETE,0.664084,6,0
9664,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",203814,15500.0,"[70, 53, 53, 53, 49, 49]","[<SOS>, 112, 112, 112, 11179, 11179]",W_Completeren aanvraag_COMPLETE,0.772533,6,0
10500,"[2, 12, 9, 10, 22, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",206417,15500.0,"[70, 53, 53, 53, 71, 71]","[<SOS>, 112, 112, 112, UNKNOWN, UNKNOWN]",W_Completeren aanvraag_COMPLETE,0.607998,6,0


In [22]:
cf.head(5)

Unnamed: 0,activity,activity_vocab,caseid,amount,resource,resource_vocab,predicted_vocab,predicted_value,lengths,activity_sparcity
12087,"[2, 12, 9, 10, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",211314,15500.0,"[70, 53, 53, 53, 64]","[<SOS>, 112, 112, 112, 11302]",A_ACCEPTED_COMPLETE,0.469806,5,1
12106,"[2, 12, 9, 10, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",211381,15500.0,"[70, 53, 53, 53, 64]","[<SOS>, 112, 112, 112, 11302]",A_ACCEPTED_COMPLETE,0.469806,5,1
12490,"[2, 12, 9, 10, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",212548,15500.0,"[70, 53, 53, 53, 64]","[<SOS>, 112, 112, 112, 11302]",A_ACCEPTED_COMPLETE,0.469806,5,1
12494,"[2, 12, 9, 10, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",212560,15500.0,"[70, 53, 53, 53, 64]","[<SOS>, 112, 112, 112, 11302]",A_ACCEPTED_COMPLETE,0.469806,5,1
11861,"[2, 12, 9, 10, 22]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",210632,15500.0,"[70, 53, 53, 53, 64]","[<SOS>, 112, 112, 112, 11302]",A_ACCEPTED_COMPLETE,0.469806,5,1


In [15]:
desired_df.columns

Index(['activity', 'activity_vocab', 'caseid', 'amount', 'resource',
       'resource_vocab', 'predicted_vocab', 'predicted_value', 'lengths',
       'activity_sparcity'],
      dtype='object')

In [None]:
features = ['activity', 'activity_vocab', 'caseid', 'amount', 'resource',
       'resource_vocab', 'predicted_vocab', 'predicted_value', 'lengths',
       'activity_sparcity']

In [23]:
desired_df['predicted_vocab'].value_counts()

W_Completeren aanvraag_COMPLETE    4046
A_ACCEPTED_COMPLETE                  33
A_CANCELLED_COMPLETE                  1
Name: predicted_vocab, dtype: int64

In [24]:
desired_df.to_csv(f"./cf_searching_result/desired_{ground_truth_vocab}_Amount_{amount}_ReplaceAmount_{replace_amount}_result.csv")

In [25]:
cf.to_csv(f"./cf_searching_result/cf_{ground_truth_vocab}_Amount_{amount}_ReplaceAmount_{replace_amount}_result.csv")