In [1]:
from parameters.dataset import BPI2012Parameters
from dataset import BPI2012Dataset
from utils.print import print_block
from utils.bpi2012 import remove_trail_steps, print_model_prediction_result, remove_tags_for_seq
import tensorflow as tf
from model import LSTMPredNextAmountSpecificModel, LSTMScenarioCfWithResourceModel
import numpy as np
from model import DiCEDefferientiableWrapper 
from dice import EventLogDiCE

In [2]:
dataset = BPI2012Dataset(BPI2012Parameters()) 


| Preprocessed data loaded successfully: ./data/preprocessed/BPI_Challenge_2012_with_resource/AOW_CompleteOnly 


In [3]:
trace_idx = 52
_, example_activities, _, example_resources, example_amount, _ = dataset.collate_fn([trace_idx])
print_block(dataset.activity_vocab.list_of_index_to_vocab_2d(example_activities),"Activity")
print_block(dataset.resource_vocab.list_of_index_to_vocab_2d(example_resources),"Resource")


| [['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_ACCEPTED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_ACTIVATED_COMPLETE', 'W_Valideren aanvraag_COMPLETE']] 

| [['<SOS>', '112', '112', '112', '11180', '11201', '11201', '11201', '11201', '11201', '11201', '11201', '11049', '11049', '10629', '10629', '10629', '10629', '10629']] 


In [4]:
## Remove tails
tail_length_to_remove = 11
example_idx_activities, example_idx_resources = remove_trail_steps(example_activities, example_resources, tail_length_to_remove)
example_vocab_activities = dataset.activity_vocab.list_of_index_to_vocab_2d(example_idx_activities)[0]
example_vocab_resources = dataset.resource_vocab.list_of_index_to_vocab_2d(example_idx_resources)[0]
print_block(example_vocab_activities, "Activity (Tail removed)")
print_block(example_vocab_resources, "Resource (Tail removed)")


| ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE'] 

| ['<SOS>', '112', '112', '112', '11180', '11201', '11201', '11201'] 


In [5]:
pred_model = LSTMPredNextAmountSpecificModel.load(
    "./SavedModels/%s" % (
    "0.8159_LSTMPredNextAmountSpecificModel_AOW_CompleteOnly_2021-06-29 18:25:07.216254" 
    )
)


| Model parameters loaded successfully from: ./SavedModels/0.8159_LSTMPredNextAmountSpecificModel_AOW_CompleteOnly_2021-06-29 18:25:07.216254  

| Vocab loaded successfully from: ./SavedModels/0.8159_LSTMPredNextAmountSpecificModel_AOW_CompleteOnly_2021-06-29 18:25:07.216254  

| Model loaded successfully from: ./SavedModels/0.8159_LSTMPredNextAmountSpecificModel_AOW_CompleteOnly_2021-06-29 18:25:07.216254  


In [6]:
_ = pred_model(**pred_model.get_example_input())
pred_model.summary()

Model: "LSTMPredNextAmountSpecificModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  832       
_________________________________________________________________
embedding_1 (Embedding)      multiple                  9216      
_________________________________________________________________
lstm (LSTM)                  multiple                  24832     
_________________________________________________________________
lstm_1 (LSTM)                multiple                  49408     
_________________________________________________________________
sequential (Sequential)      (1, 64)                   4544      
_________________________________________________________________
sequential_1 (Sequential)    (1, 1, 26)                15066     
Total params: 103,898
Trainable params: 103,258
Non-trainable params: 640
___________________________

In [7]:
##### Get model output
example_activities_input = tf.constant(example_idx_activities)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

predicted_df = print_model_prediction_result(pred_model, example_activities_input, example_resources_input, example_amount_input)


| Predicted activity with highest probability (1.00) is "O_SELECTED_COMPLETE" 



<PAD>                                      9.458650e-11
<EOS>                                      1.162047e-11
<SOS>                                      1.214198e-09
A_ACCEPTED_COMPLETE                        1.298703e-08
A_ACTIVATED_COMPLETE                       3.833323e-09
A_APPROVED_COMPLETE                        7.352894e-09
A_CANCELLED_COMPLETE                       1.139191e-04
A_DECLINED_COMPLETE                        1.029128e-06
A_FINALIZED_COMPLETE                       2.934446e-05
A_PARTLYSUBMITTED_COMPLETE                 2.463539e-10
A_PREACCEPTED_COMPLETE                     9.000219e-11
A_REGISTERED_COMPLETE                      1.219479e-10
A_SUBMITTED_COMPLETE                       2.929607e-10
O_ACCEPTED_COMPLETE                        1.501033e-10
O_CANCELLED_COMPLETE                       4.711716e-09
O_CREATED_COMPLETE                         5.534542e-05
O_DECLINED_COMPLETE  

In [8]:
predicted_df

Unnamed: 0,<PAD>,<EOS>,<SOS>,A_ACCEPTED_COMPLETE,A_ACTIVATED_COMPLETE,A_APPROVED_COMPLETE,A_CANCELLED_COMPLETE,A_DECLINED_COMPLETE,A_FINALIZED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,...,O_DECLINED_COMPLETE,O_SELECTED_COMPLETE,O_SENT_BACK_COMPLETE,O_SENT_COMPLETE,W_Afhandelen leads_COMPLETE,W_Beoordelen fraude_COMPLETE,W_Completeren aanvraag_COMPLETE,W_Nabellen incomplete dossiers_COMPLETE,W_Nabellen offertes_COMPLETE,W_Valideren aanvraag_COMPLETE
0,9.45865e-11,1.162047e-11,1.214198e-09,1.298703e-08,3.833323e-09,7.352894e-09,0.000114,1e-06,2.9e-05,2.463539e-10,...,4.675891e-10,0.999794,5.258974e-10,1.164806e-07,1.094385e-08,1.853643e-09,6e-06,9.320448e-11,8.144634e-11,5.907909e-10


In [9]:
no_need_tags = ['<EOS>', '<SOS>', '<PAD>']

possible_activities = [ a for a in list(pred_model.activity_vocab.vocabs) if  not a in no_need_tags]
possible_resources =  [ r for r in list(pred_model.resource_vocab.vocabs) if  not r in no_need_tags]

example_vocab_activities_no_tag = remove_tags_for_seq(example_vocab_activities, no_need_tags)

example_vocab_resources_no_tag = remove_tags_for_seq(example_vocab_resources, no_need_tags)

# transform to possible dim
example_idx_activities_no_tag = [ possible_activities.index(v)   for v in example_vocab_activities_no_tag ]

example_idx_resources_no_tag = [
possible_resources.index(v)   for v in example_vocab_resources_no_tag    
]

example_trace_len_no_tag = len(example_vocab_activities_no_tag)
#### Determine feature names for DiCE ####
activity_feature_names = np.array(["activity_step_%d" % (i+1) for i in range(example_trace_len_no_tag)])
resource_feature_names = np.array(["resource_step_%d" % (i+1) for i in range(example_trace_len_no_tag)])
possbile_amount = [min(dataset.df["amount"]), max(dataset.df["amount"])]

print_block(example_vocab_activities_no_tag ,title="Example activities without tags", num_marks = 30)
print_block(example_vocab_resources_no_tag ,title="Example resources without tags", num_marks = 30)
print_block(example_amount, title="=================Amount=================", num_marks=16)


| ['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE'] 

| ['112', '112', '112', '11180', '11201', '11201', '11201'] 

| [15500.0] 


In [10]:
dice_wrapper = DiCEDefferientiableWrapper(
    model=pred_model,
    activity_vocab=pred_model.activity_vocab,
    resource_vocab=pred_model.resource_vocab,
    trace_length = example_trace_len_no_tag,
    sos_idx_activity=pred_model.activity_vocab.vocab_to_index("<SOS>"),
    sos_idx_resource= pred_model.resource_vocab.vocab_to_index('<SOS>'),
    amount_min = possbile_amount[0],
    amount_max = possbile_amount[1],
    possible_resources=possible_resources,
    possible_activities=possible_activities 
)

In [11]:
scenario_model = LSTMScenarioCfWithResourceModel.load(
    "./SavedModels/%s" % ("0.9855_LSTMScenarioCfWithResourceModel_AOW_CompleteOnly_20_times_2021-06-29 10:12:02.918758" 
    )
)


| Model parameters loaded successfully from: ./SavedModels/0.9855_LSTMScenarioCfWithResourceModel_AOW_CompleteOnly_20_times_2021-06-29 10:12:02.918758  

| Vocab loaded successfully from: ./SavedModels/0.9855_LSTMScenarioCfWithResourceModel_AOW_CompleteOnly_20_times_2021-06-29 10:12:02.918758  

| Model loaded successfully from: ./SavedModels/0.9855_LSTMScenarioCfWithResourceModel_AOW_CompleteOnly_20_times_2021-06-29 10:12:02.918758  


In [12]:
_ = scenario_model(**scenario_model.get_example_input())
scenario_model.summary()

Model: "LSTMScenarioCfWithResourceModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  832       
_________________________________________________________________
embedding_3 (Embedding)      multiple                  9216      
_________________________________________________________________
lstm_2 (LSTM)                multiple                  24832     
_________________________________________________________________
lstm_3 (LSTM)                multiple                  49408     
_________________________________________________________________
sequential_2 (Sequential)    (1, 1, 1)                 9157      
Total params: 93,445
Trainable params: 93,059
Non-trainable params: 386
_________________________________________________________________


In [13]:
dice = EventLogDiCE(
    dataset.activity_vocab,
    dataset.resource_vocab,
    possbile_amount,
    possible_activities,
    possible_resources, 
    dice_wrapper,
    scenario_model
    )

In [34]:
cf_out = dice.run_pls(
    ## Input
    example_amount_input.numpy(),
    example_idx_activities_no_tag,
    example_idx_resources_no_tag,
    desired_vocab = "A_DECLINED_COMPLETE",
    
    ## Weight
    class_loss_weight = 2,
    scenario_weight = 1e6,
    distance_loss_weight = 1e-8,
    cat_loss_weight = 1e-2,
    
    ## Training parameters
    scenario_threshold = 0.1,
    max_iter=200,
    lr=0.5,
    
    ## Options
    use_valid_cf_only=False,
    use_sampling=True,
    class_using_hinge_loss=False,
    scenario_using_hinge_loss=False,
    use_clipping=True, 
)


| Prediction: [O_SELECTED_COMPLETE(17)] | Desired: [A_DECLINED_COMPLETE(7)] 


| Total [54340528.00] | Scenario [54.34] | Class [2.07] | Category [9879.00] | Distance [407.50] 

| Invalid: A_DECLINED_COMPLETE (7) | Valid: O_SENT_BACK_COMPLETE (18) 

| Total [61656628.00] | Scenario [61.66] | Class [1.95] | Category [12599.00] | Distance [714.00] 

| Invalid: A_DECLINED_COMPLETE (7) | Valid: <EOS> (1) 

| Total [62551080.00] | Scenario [62.55] | Class [1.98] | Category [10581.50] | Distance [1204.00] 

| Invalid: A_DECLINED_COMPLETE (7) | Valid: W_Valideren aanvraag_COMPLETE (25) 

| Total [61295296.00] | Scenario [61.30] | Class [2.08] | Category [10201.24] | Distance [1896.25] 

| Invalid: A_DECLINED_COMPLETE (7) | Valid: O_SELECTED_COMPLETE (17) 

| Running time: 17.93 

| [15500.] 

| ['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE'] 

