In [1]:
from parameters.dataset import BPI2012Parameters
from dataset import BPI2012Dataset
from utils.print import print_block
from utils.bpi2012 import remove_trail_steps, print_model_prediction_result, remove_tags_for_seq, generate_fake_df
import tensorflow as tf
from model import LSTMPredNextModel, LSTMScenarioCfModel
import numpy as np
from utils.print import print_block
from model import OriginalDiCEWrapper
import dice_ml
import pandas as pd

In [2]:
dataset = BPI2012Dataset(BPI2012Parameters()) 


| Preprocessed data loaded successfully: ./data/preprocessed/BPI_Challenge_2012_with_resource/A_CompleteOnly 


In [3]:
trace_idx = 52
_, example_activities, _, example_resources, example_amount, _ = dataset.collate_fn([trace_idx])
print_block(dataset.activity_vocab.list_of_index_to_vocab_2d(example_activities),"Activity")
print_block(dataset.resource_vocab.list_of_index_to_vocab_2d(example_resources),"Resource")


| [['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE', 'A_APPROVED_COMPLETE', 'A_REGISTERED_COMPLETE', 'A_ACTIVATED_COMPLETE']] 

| [['<SOS>', '112', '112', '112', '11201', '11201', '10629', '10629', '10629']] 


In [4]:
## Remove tails
tail_length_to_remove = 3
example_idx_activities, example_idx_resources = remove_trail_steps(example_activities, example_resources, tail_length_to_remove)
example_vocab_activities = dataset.activity_vocab.list_of_index_to_vocab_2d(example_idx_activities)[0]
example_vocab_resources = dataset.resource_vocab.list_of_index_to_vocab_2d(example_idx_resources)[0]
print_block(example_vocab_activities, "Activity (Tail removed)")
print_block(example_vocab_resources, "Resource (Tail removed)")


| ['<SOS>', 'A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE'] 

| ['<SOS>', '112', '112', '112', '11201', '11201'] 


In [5]:
pred_model = LSTMPredNextModel.load(
    "./SavedModels/%s" % (
    "0.8441_LSTMPredNextModel_A_CompleteOnly_2021-07-06 16:46:36.809124" 
    )
)


| Model parameters loaded successfully from: ./SavedModels/0.8441_LSTMPredNextModel_A_CompleteOnly_2021-07-06 16:46:36.809124  

| Vocab loaded successfully from: ./SavedModels/0.8441_LSTMPredNextModel_A_CompleteOnly_2021-07-06 16:46:36.809124  

| Model loaded successfully from: ./SavedModels/0.8441_LSTMPredNextModel_A_CompleteOnly_2021-07-06 16:46:36.809124  


In [6]:
##### Get model output
example_activities_input = tf.constant(example_idx_activities)
example_resources_input = tf.constant(example_idx_resources)
example_amount_input = tf.constant(example_amount)

predicted_df = print_model_prediction_result(pred_model, example_activities_input, example_resources_input, example_amount_input)


| Predicted activity with highest probability (0.29) is "A_CANCELLED_COMPLETE" 



<PAD>                         0.000014
<EOS>                         0.129297
<SOS>                         0.000019
A_ACCEPTED_COMPLETE           0.000103
A_ACTIVATED_COMPLETE          0.196369
A_APPROVED_COMPLETE           0.170411
A_CANCELLED_COMPLETE          0.294768
A_DECLINED_COMPLETE           0.068447
A_FINALIZED_COMPLETE          0.000009
A_PARTLYSUBMITTED_COMPLETE    0.000004
A_PREACCEPTED_COMPLETE        0.000042
A_REGISTERED_COMPLETE         0.140504
A_SUBMITTED_COMPLETE          0.000013
Name: 0, dtype: float64


In [7]:
predicted_df

Unnamed: 0,<PAD>,<EOS>,<SOS>,A_ACCEPTED_COMPLETE,A_ACTIVATED_COMPLETE,A_APPROVED_COMPLETE,A_CANCELLED_COMPLETE,A_DECLINED_COMPLETE,A_FINALIZED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_PREACCEPTED_COMPLETE,A_REGISTERED_COMPLETE,A_SUBMITTED_COMPLETE
0,1.4e-05,0.129297,1.9e-05,0.000103,0.196369,0.170411,0.294768,0.068447,9e-06,4e-06,4.2e-05,0.140504,1.3e-05


In [8]:
no_need_tags = ['<EOS>', '<SOS>', '<PAD>']

possible_activities = [ a for a in list(pred_model.activity_vocab.vocabs) if  not a in no_need_tags]
possible_resources =  [ r for r in list(pred_model.resource_vocab.vocabs) if  not r in no_need_tags]

example_vocab_activities_no_tag = remove_tags_for_seq(example_vocab_activities, no_need_tags)

example_vocab_resources_no_tag = remove_tags_for_seq(example_vocab_resources, no_need_tags)

# transform to possible dim
example_idx_activities_no_tag = [ possible_activities.index(v)   for v in example_vocab_activities_no_tag ]

example_idx_resources_no_tag = [
possible_resources.index(v)   for v in example_vocab_resources_no_tag    
]

example_trace_len_no_tag = len(example_vocab_activities_no_tag)
#### Determine feature names for DiCE ####
activity_feature_names = np.array(["activity_step_%d" % (i+1) for i in range(example_trace_len_no_tag)])
resource_feature_names = np.array(["resource_step_%d" % (i+1) for i in range(example_trace_len_no_tag)])
possbile_amount = [min(dataset.df["amount"]), max(dataset.df["amount"])]

print_block(example_vocab_activities_no_tag ,title="Example activities without tags", num_marks = 30)
print_block(example_vocab_resources_no_tag ,title="Example resources without tags", num_marks = 30)
print_block(example_amount, title="=================Amount=================", num_marks=16)


| ['A_SUBMITTED_COMPLETE', 'A_PARTLYSUBMITTED_COMPLETE', 'A_PREACCEPTED_COMPLETE', 'A_ACCEPTED_COMPLETE', 'A_FINALIZED_COMPLETE'] 

| ['112', '112', '112', '11201', '11201'] 

| [15500.0] 


In [9]:
############ Setting up desired activity ############
desired_activity = 'A_APPROVED_COMPLETE' # A_DECLINED_COMPLETE, A_APPROVED_COMPLETE
print_block(desired_activity, "Desired activity")

dice_binary_model = OriginalDiCEWrapper(
    model=pred_model,
    activity_vocab=pred_model.activity_vocab,
    resource_vocab=pred_model.resource_vocab,
    desired=pred_model.activity_vocab.vocab_to_index(desired_activity),
    trace_length = example_trace_len_no_tag,
    possible_amount=possbile_amount,
    possible_resources=possible_resources,
    possible_activities=possible_activities 
)


| A_APPROVED_COMPLETE 


In [10]:
fake_df = generate_fake_df(5000, activity_feature_names, resource_feature_names, possible_activities, possible_resources, possbile_amount, example_trace_len_no_tag)
d = dice_ml.Data(dataframe=fake_df, outcome_name="predicted",continuous_features = ['amount'])
m = dice_ml.Model(model=dice_binary_model, backend="TF2")
exp = dice_ml.Dice(d, m)

In [11]:
### Prepare input df
feature_names = activity_feature_names.tolist() + resource_feature_names.tolist() + ['amount']
query_instance = [example_vocab_activities_no_tag + example_vocab_resources_no_tag + example_amount]
example_df = pd.DataFrame(query_instance, columns= feature_names)

In [15]:
example_df

Unnamed: 0,activity_step_1,activity_step_2,activity_step_3,activity_step_4,activity_step_5,resource_step_1,resource_step_2,resource_step_3,resource_step_4,resource_step_5,amount
0,A_SUBMITTED_COMPLETE,A_PARTLYSUBMITTED_COMPLETE,A_PREACCEPTED_COMPLETE,A_ACCEPTED_COMPLETE,A_FINALIZED_COMPLETE,112,112,112,11201,11201,15500.0


In [20]:
example_amount

[15500.0]

In [30]:
dice_exp = exp.generate_counterfactuals(
        example_df,
        total_CFs=1,
        verbose=True,
        min_iter=100,
        max_iter=2000,
        features_to_vary=[
            'amount',
            # *activity_feature_names.tolist(),
            # *resource_feature_names.tolist(),
        ],
        # desired_class="opposite",
        # yloss_type= "log_loss" # log_loss, hinge_loss, l2_loss
        # algorithm = "DiverseCF", # DiverseCF, RandomInitCF
        # proximity_weight=0.5, # 0.5,
        # diversity_weight=1,# 1.0,
        # init_near_query_instance=True,
        # tie_random = True,
        # categorical_penalty = 1,
        # learning_rate=0.0005,
    )

step 1,  loss=0


KeyboardInterrupt: 

In [None]:
dice_exp.visualize_as_dataframe(show_only_changes=True, display_sparse_df=False)

In [None]:
######## Print the example trace ########
print_big(list(example_df.iloc[0]), "Original", num_marks = 50)

In [None]:
######## Print the counterfactual trace ########
if len(dice_exp.final_cfs_df) > 0:
    print_big(list(dice_exp.final_cfs_df.iloc[0][:-1]), "Counterfactual", num_marks=50)
else:
    print_big("Not found!", "Counterfactual")