In [1]:
import importlib
import sys
import os
import torch
import numpy as np
from tqdm.notebook import tqdm
import torch

sys.path.insert(0, '..')
sys.path.insert(1, '../../..')
sys.path.insert(0, "../../src")  # src package


## Generate Train, Val, Test

In [2]:
from perturbation_logic.activity_pertubator import (
    split_prefix_suffix_readable,
    redo_last_activity_of_prefix)
from event_log_loader_service.event_log_loader import (get_train_test_val_datasets,
                                                       extract_feature_info)
np.random.seed(17)

csv_path="../../../data/BPI Challenge 2017.csv"

properties = {
    'case_name' : 'case:concept:name',
    'concept_name' : 'concept:name',
    'timestamp_name' : 'time:timestamp',
    'time_since_case_start_column' : 'case_elapsed_time',
    'time_since_last_event_column' : 'event_elapsed_time',
    'day_in_week_column' : 'day_in_week',
    'seconds_in_day_column' : 'seconds_in_day',
    'min_suffix_size' : 5,
    'train_validation_size' : 0.15,
    'test_validation_size' : 0.2,
    'window_size' : 'auto',
    'categorical_columns' : ['concept:name', 'Action', 'org:resource', 'EventOrigin', 'lifecycle:transition', 'case:LoanGoal', 'case:ApplicationType', 'Accepted', 'Selected', ],
    'continuous_columns' : ['case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day', 'case:RequestedAmount', 'FirstWithdrawalAmount', 'NumberOfTerms', 'MonthlyCost', 'CreditScore'],
    'continuous_positive_columns' : []
}


train_df, val_df, test_df,  = get_train_test_val_datasets(csv_path, properties)


print(len(train_df))

# data_train = split_prefix_suffix_readable(
#     train_df,
#     case_column=properties["case_name"],
#     activity_column=properties["concept_name"],
#     min_suffix_size=2,
# )

data_val = split_prefix_suffix_readable(
    val_df,
    case_column=properties["case_name"],
    activity_column=properties["concept_name"],
    min_suffix_size=2,
)

# data_test = split_prefix_suffix_readable(
#     test_df,
#     case_column=properties["case_name"],
#     activity_column=properties["concept_name"],
#     min_suffix_size=2,
# )

#torch.save(data_train, '../../../perturbed_data/helpdesk/train.pkl')
torch.save(data_val, '../../../perturbed_data/BPIC17/val.pkl')
#torch.save(data_test, '../../../perturbed_data/helpdesk/test.pkl')


#display(train_df)

#extract feature info
feature_info = extract_feature_info(val_df, properties)


881621


# Create perturbed Datasets

In [3]:
# Last Event Attack
from perturbation_logic.feature_attacks import last_event_attack

# Reset data_val_copy for feature attacks
data_val_copy = data_val.copy()

# Attacks the last event of each prefix
data_pert_last = last_event_attack(
    data=data_val_copy,
    properties=properties,
    feature_info=feature_info,
    attackable_features=['concept:name', 'Action', 'org:resource', 'EventOrigin', 'lifecycle:transition', 'case:LoanGoal', 'case:ApplicationType', 'Accepted', 'Selected', 'case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day', 'case:RequestedAmount', 'FirstWithdrawalAmount', 'NumberOfTerms', 'MonthlyCost', 'CreditScore'],
    num_of_features_to_attack=6,
    magnitude=0.5,
    feature_range_scope='lokal',
    random_seed=17
)

torch.save(data_pert_last, '../../../perturbed_data/BPIC17/last_event_attack.pkl')

In [None]:
# Random Event Attack
from perturbation_logic.feature_attacks import random_event_attack

# Attacks random events in each prefix with probability p
data_val_copy = data_val.copy()  
data_pert_random = random_event_attack(
    data=data_val_copy,
    properties=properties,
    feature_info=feature_info,
    attackable_features=['concept:name', 'Action', 'org:resource', 'EventOrigin', 'lifecycle:transition', 'case:LoanGoal', 'case:ApplicationType', 'Accepted', 'Selected', 'case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day', 'case:RequestedAmount', 'FirstWithdrawalAmount', 'NumberOfTerms', 'MonthlyCost', 'CreditScore'],
    num_of_features_to_attack=5,
    event_attack_probability=0.3,
    magnitude=0.5,
    feature_range_scope='local',
    random_seed=17
)

torch.save(data_pert_random, '../../../perturbed_data/BPIC17/random_event_attack.pkl')

In [None]:
# Apply "redo last activity" augmentation to each prefix/suffix pair

data_val_copy = data_val.copy()  # Reset again
data_pert = {}
for key, (prefix_df, suffix_df) in data_val_copy.items():
    new_prefix, new_suffix = redo_last_activity_of_prefix(
        prefix_df,
        suffix_df,
        properties=properties,
    )
    data_pert[key] = (new_prefix, new_suffix)


torch.save(data_pert, '../../../perturbed_data/BPIC17/redo_pert.pkl')


# Compare changes

In [None]:
# Compare clean and perturbed datasets
from perturbation_logic.attack_impact_analyzer import highlight_feature_attack_impact

# Compare clean dataset with random event attack
highlight_feature_attack_impact(
    clean_data_path='../../../perturbed_data/BPIC17/val.pkl',
    perturbed_data_path='../../../perturbed_data/BPIC17/last_event_attack.pkl',
    properties=properties
)

Loading clean dataset from: ../../../perturbed_data/helpdesk/val.pkl
Loading perturbed dataset from: ../../../perturbed_data/helpdesk/last_event_attack.pkl

Clean dataset has 1898 cases
Perturbed dataset has 1898 cases

COMPARISON RESULTS

Case: Case 1, Prefix Length: 1
  [CHANGED] Found 1 event(s) with differences

  Event 0:
    Variant index:
      Clean:    12.0
      Perturbed: 18.0 [CHANGED]


Case: Case 1, Prefix Length: 2
  [CHANGED] Found 1 event(s) with differences

  Event 1:
    Resource:
      Clean:    Value 1
      Perturbed: Value 12 [CHANGED]


Case: Case 1, Prefix Length: 3
  [CHANGED] Found 1 event(s) with differences

  Event 2:
    Variant index:
      Clean:    12.0
      Perturbed: 34.0 [CHANGED]
    Resource:
      Clean:    Value 2
      Perturbed: Value 20 [CHANGED]


Case: Case 10, Prefix Length: 1
  [CHANGED] Found 1 event(s) with differences

  Event 0:
    Variant index:
      Clean:    1.0
      Perturbed: 128.0 [CHANGED]


Case: Case 10, Prefix Length: 2

In [None]:
# Compare clean and perturbed datasets
from perturbation_logic.attack_impact_analyzer import highlight_structural_attack_impact

# Compare clean dataset with random event attack
highlight_structural_attack_impact(
    clean_data_path='../../../perturbed_data/BPIC17/test_clean.pkl',
    perturbed_data_path='../../../perturbed_data/BPIC17/test_redo_pert.pkl',
    properties=properties
)

Loading clean dataset from: ../../../perturbed_data/helpdesk/test_clean.pkl
Loading perturbed dataset from: ../../../perturbed_data/helpdesk/test_redo_pert.pkl

Clean dataset has 2444 cases
Perturbed dataset has 2444 cases

STRUCTURAL ATTACK COMPARISON RESULTS

Case: Case 1617, Prefix Length: 1
Prefix
Activity_seq_clean = ['Assign seriousness']
Activity_seq_pert = ['Assign seriousness', 'Assign seriousness']
Case_elapsed_time_clean = [0.0]
Case_elapsed_time_pert = [0.0, 0.0]
Event_elapsed_time_clean = [nan]
Event_elapsed_time_pert = [nan, nan]
Day_in_week_clean = [3.0]
Day_in_week_pert = [3.0, 3.0]
Seconds_in_day_clean = [25523.0]
Seconds_in_day_pert = [25523.0, 25523.0]

===
suffix
Activity_seq_clean = ['Wait', 'Take in charge ticket', 'Resolve ticket', 'Closed', 'EOS', 'EOS', 'EOS', 'EOS', 'EOS']
Activity_seq_pert = ['Wait', 'Take in charge ticket', 'Resolve ticket', 'Closed', 'EOS', 'EOS', 'EOS', 'EOS', 'EOS']
Case_elapsed_time_clean = [6477.0, 2967139.0, 2967147.0, 4263168.0, nan, 