In [1]:
from parameters.dataset import BPI2012Parameters
from dataset import BPI2012Dataset
from utils.bpi2012 import  print_model_prediction_result, get_example_data_with_removed_tail
import tensorflow as tf
from model import LSTMPredNextModel
import numpy as np
from parameters.training import TrainingParameters
from utils.preprocessing import dataset_split

In [2]:
### Load dataset
dataset = BPI2012Dataset(BPI2012Parameters()) 


| Preprocessed data loaded successfully: ./data/preprocessed/BPI_Challenge_2012_with_resource\AOW_CompleteOnly 


In [3]:
### Prepare the training dataset.
train_params = TrainingParameters(
    stop_epoch=20,
)
train_dataset, test_dataset, validation_dataset = dataset_split(
    list(range(len(dataset))),
    train_params.train_test_split_portion,
    seed=train_params.random_seed,
    shuffle=True
)
training_df = dataset.df.iloc[train_dataset]

In [4]:
# Get example from dataset
# test_trace_idx = 52
test_trace_idx = 52

tail_length_to_remove = 4
# 4 -> A_APPROVED_COMPLETE
# 12 -> A_FINALIZED_COMPLETE
# 13 -> A_ACCEPTED_COMPLETE
example_vocab_activities, example_idx_activities, example_vocab_resources, example_idx_resources, example_amount, ground_truth_vocab = get_example_data_with_removed_tail(
    dataset, trace_idx=test_trace_idx, tail_length_to_remove=tail_length_to_remove)



| [['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'W_Valideren aanvraag_COMPLETE']] 

| [['<SOS>', '112', '112', '112', '11180', '11201', '11201', '11201', '11201', '11201', '11201', '11201', '11049', '11049', '10629', '10629', '10629', '10629', '10629']] 

| [15500.0] 

| ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SENT_B

In [5]:
### Checking if GPU available.
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [6]:
pred_model = LSTMPredNextModel.load(
    "./SavedModels/%s" % (
    "0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20'45'16.353467" 
    )
)
_ = pred_model(**pred_model.get_example_input())
pred_model.summary()


| Model parameters loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20'45'16.353467  

| Vocab loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20'45'16.353467  

| Model loaded successfully from: ./SavedModels/0.8175_LSTMPredNextModel_AOW_CompleteOnly_2021-07-01 20'45'16.353467  
Model: "LSTMPredNextModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  832       
_________________________________________________________________
embedding_1 (Embedding)      multiple                  9216      
_________________________________________________________________
lstm (LSTM)                  multiple                  24832     
_________________________________________________________________
lstm_1 (LSTM)                multiple                  33024     
________

In [7]:
##### Get model output #####
example_activities_input = tf.constant(example_idx_activities)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

predicted_df = print_model_prediction_result(pred_model, example_activities_input, example_resources_input, example_amount_input)


| Predicted activity with highest probability (0.44) is "A_REGISTERED_COMPLETE" 



<PAD>                                      3.830043e-06
<EOS>                                      1.486540e-06
<SOS>                                      7.286466e-06
A_ACCEPTED_COMPLETE                        4.388551e-07
A_ACTIVATED_COMPLETE                       1.337329e-01
A_APPROVED_COMPLETE                        4.276519e-01
A_CANCELLED_COMPLETE                       2.492681e-07
A_DECLINED_COMPLETE                        9.044126e-06
A_FINALIZED_COMPLETE                       1.036809e-04
A_PARTLYSUBMITTED_COMPLETE                 1.632818e-04
A_PREACCEPTED_COMPLETE                     1.068880e-06
A_REGISTERED_COMPLETE                      4.374775e-01
A_SUBMITTED_COMPLETE                       8.551146e-06
O_ACCEPTED_COMPLETE                        2.834157e-04
O_CANCELLED_COMPLETE                       4.004732e-04
O_CREATED_COMPLETE                         7.850543e-06
O_DECLINED_COMPLETE

In [8]:
no_need_tags = ['<EOS>', '<SOS>', '<PAD>']
possible_activities = [ a for a in list(pred_model.activity_vocab.vocabs) if  not a in no_need_tags]
possible_resources =  [ r for r in list(pred_model.resource_vocab.vocabs) if  not r in no_need_tags]
possbile_amount = [min(dataset.df["amount"]), max(dataset.df["amount"])]


In [9]:
from dice4el import DiCE4EL_BPI2012
dice4el_bpi2012 = DiCE4EL_BPI2012(
    dataset.activity_vocab,
    dataset.resource_vocab,
    possbile_amount,
    possible_activities,
    possible_resources, 
    pred_model,
    training_df,
    )

In [10]:
dice4el_bpi2012.generate_counterfactual(
    amount_input = example_amount_input.numpy(),
    idx_activities = example_idx_activities,
    idx_resources = example_idx_resources,
    desired_vocab = "A_FINALIZED_COMPLETE",

    ## Weight
    class_loss_weight = 1,
    distance_loss_weight = 1e-8,
    
    ## Training parameters
    max_iter=5,
    lr=5000,

    ## Options
    class_using_hinge_loss=False,
    use_clipping=True, 
)


| A_REGISTERED_COMPLETE ====> A_FINALIZED_COMPLETE 

| Found 109 potentail counterfactuals in training set. 

| Found 109 cases. 


Unnamed: 0,activity,activity_vocab,resource,resource_vocab,amount,predicted_vocab,activity_sparcity
3711,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 27, 51, 51, 51]","[<SOS>, 112, 112, 112, 10932, 11181, 11181, 11...",[15500.0],A_FINALIZED_COMPLETE,7
12849,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 38, 17, 38, 38]","[<SOS>, 112, 112, 112, 11009, 10881, 11009, 11...",[15500.0],A_FINALIZED_COMPLETE,7
8904,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 55, 47, 47, 47]","[<SOS>, 112, 112, 112, 11201, 11122, 11122, 11...",[15500.0],A_FINALIZED_COMPLETE,7
851,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 52, 33, 52, 52]","[<SOS>, 112, 112, 112, 11189, 10982, 11189, 11...",[15500.0],A_FINALIZED_COMPLETE,7
3514,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 47, 23, 15, 15]","[<SOS>, 112, 112, 112, 11122, 10913, 10863, 10...",[15500.0],A_FINALIZED_COMPLETE,7
...,...,...,...,...,...,...,...
8261,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 25, 71, 30, 30]","[<SOS>, 112, 112, 112, 10929, UNKNOWN, 10939, ...",[15500.0],A_FINALIZED_COMPLETE,7
12445,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 46, 63, 28, 28]","[<SOS>, 112, 112, 112, 11121, 11300, 10933, 10...",[15500.0],A_FINALIZED_COMPLETE,7
754,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 71, 71, 52, 52]","[<SOS>, 112, 112, 112, UNKNOWN, UNKNOWN, 11189...",[15500.0],A_FINALIZED_COMPLETE,7
11085,"[2, 12, 9, 10, 22, 22, 3, 17]","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...","[70, 53, 53, 53, 47, 47, 47, 47]","[<SOS>, 112, 112, 112, 11122, 11122, 11122, 11...",[15500.0],A_FINALIZED_COMPLETE,7
