In [1]:
from Controller import TrainingController, ScenarioCfController
from Parameters import TrainingParameters
from Utils.SaveUtils import load_parameters
from Utils.PrintUtils import print_big
import tensorflow as tf
import json
from dice import EventLogDiCE
import numpy as np
import pandas as pd
import time
from Models import DiCEBinaryDefferentiable
from itertools import chain
from IPython.core.display import display, HTML
from Utils.DiCEHelpers import generate_fake_df, get_trace_with_id, get_longest_trace_row, remove_trail_steps, print_model_prediction_result, remove_tags_for_seq

In [2]:
folder_path = "./SavedModels/%s" % (
    "0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443" #AOW
)

In [3]:
### Initialise controllers
parameters_json = load_parameters(folder_path=folder_path)
parameters = TrainingParameters(**parameters_json)
tf.random.set_seed(parameters.dataset_split_seed)
np.random.seed(parameters.dataset_split_seed)
parameters.load_model_folder_path = folder_path
trainer = TrainingController(parameters = parameters)
trainer.show_model_info()


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012_with_resource/AOW 

| Model loaded successfully from: ./SavedModels/0.8264_BPI2012WithResource_BaselineLSTMWithResource_2021-06-18 06:11:10.009443  
Model: "baseline_lstm_with_resource"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  832       
_________________________________________________________________
embedding_1 (Embedding)      multiple                  2304      
_________________________________________________________________
lstm (LSTM)                  multiple                  24832     
_________________________________________________________________
lstm_1 (LSTM)                multiple                  33024     
_________________________________________________________________
lstm_2 (

In [4]:
len(trainer.dataset.df)

13087

In [5]:
########### Get example data from trainer ###########
ordered_test_idx = (list(trainer.test_dataset.unbatch().as_numpy_iterator()))
ordered_test_idx.sort()
print_big(len(ordered_test_idx), "Test set length")


| 1309 


In [6]:
########## Get longest delcined trace for testing ##########
declined_df = get_trace_with_id(trainer.dataset.df.iloc[ordered_test_idx], trainer.model.vocab.vocab_to_index('A_DECLINED_COMPLETE'))
longest_declined_trace_row = get_longest_trace_row(declined_df)
longest_declined_trace_row

Unnamed: 0,trace,trace_vocab,caseid,amount,resource,resource_orig
8464,"[2, 12, 9, 10, 20, 22, 3, 17, 8, 15, 19, 22, 1...","[<SOS>, A_SUBMITTED_COMPLETE, A_PARTLYSUBMITTE...",200028,5800.0,"[70, 53, 53, 15, 15, 48, 37, 37, 37, 37, 37, 3...","[<SOS>, 112, 112, 10863, 10863, 11169, 11003, ..."


In [7]:
_, example_activities, _, example_resources, example_amount, _ = trainer.dataset.collate_fn([longest_declined_trace_row.index[0]])

In [8]:
 print("========================================Trace========================================")
 print(trainer.model.vocab.list_of_index_to_vocab_2d(example_activities))

[['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete do

In [9]:
tail_length_to_remove = 20
example_idx_activities, example_idx_resources = remove_trail_steps(example_activities, example_resources, tail_length_to_remove)
example_vocab_activities = trainer.model.vocab.list_of_index_to_vocab_2d(example_idx_activities)[0]
example_vocab_resources = [trainer.model.resources[i] for i in example_idx_resources[0]]
print("========================================Trace after substraction========================================")
print(example_vocab_activities)

['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE']


In [10]:
##### Get model output
example_activities_input = tf.constant(example_idx_activities)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

predicted_df = print_model_prediction_result(trainer.model, example_activities_input, example_resources_input, example_amount_input)


| Predicted activity with highest probability (0.47) is "W_Nabellen incomplete dossiers_COMPLETE" 



<PAD>                                      0.000265
<EOS>                                      0.042177
<SOS>                                      0.000228
A_ACCEPTED_COMPLETE                        0.003835
A_ACTIVATED_COMPLETE                       0.015623
A_APPROVED_COMPLETE                        0.023551
A_CANCELLED_COMPLETE                       0.092615
A_DECLINED_COMPLETE                        0.051151
A_FINALIZED_COMPLETE                       0.001158
A_PARTLYSUBMITTED_COMPLETE                 0.000178
A_PREACCEPTED_COMPLETE                     0.000699
A_REGISTERED_COMPLETE                      0.012112
A_SUBMITTED_COMPLETE                       0.001095
O_ACCEPTED_COMPLETE                        0.036355
O_CANCELLED_COMPLETE                       0.025155
O_CREATED_COMPLETE                         0.000067
O_DECLINED_COMPLETE                        0.015146
O_SELECTED_CO

In [11]:
predicted_df

Unnamed: 0,<PAD>,<EOS>,<SOS>,A_ACCEPTED_COMPLETE,A_ACTIVATED_COMPLETE,A_APPROVED_COMPLETE,A_CANCELLED_COMPLETE,A_DECLINED_COMPLETE,A_FINALIZED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,...,O_DECLINED_COMPLETE,O_SELECTED_COMPLETE,O_SENT_BACK_COMPLETE,O_SENT_COMPLETE,W_Afhandelen leads_COMPLETE,W_Beoordelen fraude_COMPLETE,W_Completeren aanvraag_COMPLETE,W_Nabellen incomplete dossiers_COMPLETE,W_Nabellen offertes_COMPLETE,W_Valideren aanvraag_COMPLETE
0,0.000265,0.042177,0.000228,0.003835,0.015623,0.023551,0.092615,0.051151,0.001158,0.000178,...,0.015146,0.046052,0.010418,0.000426,2.1e-05,0.00046,0.017568,0.471511,0.005987,0.126147


In [12]:
no_need_tags = ['<EOS>', '<SOS>', '<PAD>']

possible_activities = [ a for a in list(trainer.model.vocab.vocabs) if  not a in no_need_tags]
possible_resources =  [ r for r in list(trainer.model.resources) if  not r in no_need_tags]

example_vocab_activities_no_tag = remove_tags_for_seq(example_vocab_activities, no_need_tags)

example_vocab_resources_no_tag = remove_tags_for_seq(example_vocab_resources, no_need_tags)

# transform to possible dim
example_idx_activities_no_tag = [ possible_activities.index(v)   for v in example_vocab_activities_no_tag ]

example_idx_resources_no_tag = [
possible_resources.index(v)   for v in example_vocab_resources_no_tag    
]

example_trace_len_no_tag = len(example_vocab_activities_no_tag)
#### Determine feature names for DiCE ####
activity_feature_names = np.array(["activity_step_%d" % (i+1) for i in range(example_trace_len_no_tag)])
resource_feature_names = np.array(["resource_step_%d" % (i+1) for i in range(example_trace_len_no_tag)])
possbile_amount = [min(trainer.dataset.df["amount"]), max(trainer.dataset.df["amount"])]

print_big(example_vocab_activities_no_tag ,title="Example activities without tags", num_marks = 30)
print_big(example_vocab_resources_no_tag ,title="Example resources without tags", num_marks = 30)
print_big(example_amount, title="=================Amount=================", num_marks=16)


| ['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE'] 

| ['112', '112', '10863', '108

In [13]:
############ Setting up desired activity ############
desired_activity = 'A_DECLINED_COMPLETE' # A_DECLINED_COMPLETE, A_APPROVED_COMPLETE
print_big(desired_activity, "Desired activity")

dice_binary_model = DiCEBinaryDefferentiable(
    model=trainer.model,
    vocab=trainer.model.vocab,
    resources= trainer.model.resources,
    desired=trainer.model.vocab.vocab_to_index(desired_activity),
    trace_length = example_trace_len_no_tag,
    sos_idx_activity=trainer.model.vocab.vocab_to_index("<SOS>"),
    sos_idx_resource= trainer.model.resources.index('<SOS>'),
    amount_min = possbile_amount[0],
    amount_max = possbile_amount[1],
    possible_resources=possible_resources,
    possible_activities=possible_activities 
)


| A_DECLINED_COMPLETE 


In [14]:
# fake_df = generate_fake_df(5000, activity_feature_names, resource_feature_names, possible_activities, possible_resources, possbile_amount, example_trace_len_no_tag)

In [15]:
scenario_folder_path = "./SavedModels/%s" % (
#     "0.9860_BPI2012WithResource_BaselineLSTMWithResource_2021-06-25 21:03:20.326179"
    "BPI2012WithResource_ValidPath_2021-06-27 05:38:17.057241"
)
scenario_parameter_json = load_parameters(folder_path=scenario_folder_path)
scenario_parameter = TrainingParameters(**scenario_parameter_json)
scenario_parameter.load_model_folder_path = scenario_folder_path
scenarioController = ScenarioCfController(parameters = scenario_parameter)
scenarioController.show_model_info()


| Running on /job:localhost/replica:0/task:0/device:CPU:0  

| Preprocessed data loaded successfully: ./datasets/preprocessed/BPI_Challenge_2012_valid_trace/All 

| Model loaded successfully from: ./SavedModels/BPI2012WithResource_ValidPath_2021-06-27 05:38:17.057241  
Model: "scenario_cf_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  832       
_________________________________________________________________
embedding_3 (Embedding)      multiple                  9216      
_________________________________________________________________
lstm_4 (LSTM)                multiple                  8320      
_________________________________________________________________
lstm_5 (LSTM)                multiple                  8320      
_________________________________________________________________
lstm_6 (LSTM)                multiple     

In [16]:
dice = EventLogDiCE(scenarioController.model.activity_vocab, scenarioController.model.resource_vocab, possbile_amount, possible_activities, possible_resources, dice_binary_model, scenarioController.model)

In [17]:
cf_out = dice.run_pls(
    example_amount_input.numpy(),
    example_idx_activities_no_tag,
    example_idx_resources_no_tag,
    
    ## Weight
    class_loss_weight = 1,
    scenario_weight=.8,
    distance_loss_weight=0,
    cat_loss_weight =.5,
    
    ## Training parameters
    max_iter=800,
    lr=0.05,
    
    ## Options
    use_valid_cf_only=False,
    use_sampling=True,
    scenario_using_hinge_loss=False,
    use_clipping=False, 
)


| Prediction: [0.0] | Desired: [1.0] 

| [11.144199] 

| [11.147441] 

| [11.150659] 

| [11.153875] 

| [11.15705] 

| [11.160206] 

| [11.163323] 

| [11.166396] 

| [11.16942] 

| [11.172444] 

| [11.175476] 

| [11.178469] 

| [11.181472] 

| [11.184498] 

| [11.187498] 

| [11.190484] 

| [11.193458] 

| [11.196441] 

| [11.199439] 

| [11.202443] 

| [11.205518] 

| [11.20866] 

| [11.211815] 

| [11.215] 

| [11.218171] 

| [11.221272] 

| [11.224371] 

| [11.227473] 

| [11.230567] 

| [11.2337] 

| [11.236793] 

| [11.239852] 

| [11.2428665] 

| [11.245898] 

| [11.248958] 

| [11.252081] 

| [11.255192] 

| [11.25833] 

| [11.261539] 

| [5800.] 

| ['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CANCELLED_C

In [18]:
dice_binary_model.desired

7

In [19]:
dice_binary_model.all_predicted

ListWrapper([23, 23, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7, 23, 7,

In [20]:
def get_trace_and_prediction_from_model_input(scenarioController, model_input):
    activity, resource, amount = model_input
    
    if len(activity.shape) == 2:
        activity = tf.one_hot(activity, depth= len(scenarioController.model.activity_vocab))
        resource = tf.one_hot(resource, depth= len(scenarioController.model.resource_vocab))
    
    idx_activity = tf.argmax(activity, axis=-1).numpy().tolist()[0]
    vocab_activity = scenarioController.model.activity_vocab.list_of_index_to_vocab(idx_activity)
    print_big(vocab_activity, "Activitiy")
    
    idx_resource = tf.argmax(resource, axis=-1).numpy().tolist()[0]
    vocab_resource = scenarioController.model.resource_vocab.list_of_index_to_vocab(idx_resource)
    print_big(vocab_resource, "Resource")
    
    sigmoid_output = tf.nn.sigmoid(scenarioController.model(*model_input)[0]).numpy()
    print_big(np.around(sigmoid_output.flatten(), decimals=3), "Prediction from scenario")
    
## ! The cf_out should be the same as temp_model_input

In [21]:
print_big("Original input")
get_trace_and_prediction_from_model_input(scenarioController, [example_activities_input, example_resources_input, example_amount_input])
print_big("Input CF")
get_trace_and_prediction_from_model_input(scenarioController, dice.model_input)
print_big("Updated CF")
get_trace_and_prediction_from_model_input(scenarioController, dice.temp_model_input)


| Original input 

| ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'W_Afhandelen leads_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'A_ACCEPTED_COMPLETE', 'O_SELECTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Completeren aanvraag_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_CANCELLED_COMPLETE', 'O_SELECTED_COMPLETE', 'O_CREATED_COMPLETE', 'O_SENT_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'O_SENT_BACK_COMPLETE', 'W_Nabellen offertes_COMPLETE', 'W_Valideren aanvraag_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE', 'W_Nabellen incomplete dossiers_COMPLETE'] 

| 

In [22]:
def get_scenario_prediction_from_vocabs(scenarioController, cf_out):
    cf_amount, cf_activity, cf_resource = cf_out

    input_activity = [scenarioController.model.activity_vocab.sos_idx()] + scenarioController.model.activity_vocab.list_of_vocab_to_index(cf_activity)
    input_activity = tf.constant([input_activity]) 

    input_resource = [scenarioController.model.resource_vocab.sos_idx()] + scenarioController.model.resource_vocab.list_of_vocab_to_index(cf_resource)
    input_resource = tf.constant([input_resource])

    input_amount = tf.constant([cf_amount])
    
    print_big(scenarioController.model(input_activity, input_resource, input_amount)[0].numpy().flatten().tolist(), "Scenario CF Output")

In [23]:
get_scenario_prediction_from_vocabs(scenarioController, cf_out)


| [1.3800677061080933, 8.66239070892334, 11.983354568481445, 12.370619773864746, 12.4572114944458, 12.677468299865723, 12.755965232849121, 12.901230812072754, 12.954282760620117, 12.770425796508789, 12.821743965148926, 12.888558387756348, 13.096856117248535, 12.702580451965332, 12.517668724060059, 12.717545509338379, 12.899633407592773, 12.619112014770508, 12.823912620544434, 12.928019523620605, 13.320109367370605, 13.620327949523926, 13.90634822845459, 14.1511869430542, 14.370399475097656, 14.561600685119629, 14.450642585754395, 14.419472694396973, 14.577902793884277, 14.563631057739258, 14.48963451385498, 14.493477821350098, 14.54056167602539, 14.61838436126709] 
