In [1]:
"""
Use of same experimental setup as for our Probabilistic Suffix Prediction.

Reimplementation for comparison: 
- Paper: Camargo, Manuel, Marlon Dumas, and Oscar González-Rojas. "Learning accurate LSTM models of business processes." International Conference on Business Process Management. Cham: Springer International Publishing, 2019.
- Github (code) from: https://github.com/AdaptiveBProcess/GenerativeLSTM/tree/master/
"""

'\nUse of same experimental setup as for our Probabilistic Suffix Prediction.\n\nReimplementation for comparison: \n- Paper: Camargo, Manuel, Marlon Dumas, and Oscar González-Rojas. "Learning accurate LSTM models of business processes." International Conference on Business Process Management. Cham: Springer International Publishing, 2019.\n- Github (code) from: https://github.com/AdaptiveBProcess/GenerativeLSTM/tree/master/\n'

# Imports

In [1]:
import importlib
import sys
import torch

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')

# Data

### Load Data Files

In [2]:
# Path to your pickle file (saved with torch.save)
#file_path_train = '../../../../../../encoded_data/compare_camargo/helpdesk_all_5_train.pkl'
file_path_train = '../../../../../../encoded_data/helpdesk_all_5_train.pkl'
# Load the dataset using torch.load
helpdesk_train_dataset = torch.load(file_path_train, weights_only=False)
# Check the type of the loaded dataset
print(type(helpdesk_train_dataset))

# Path to your pickle file (saved with torch.save)
#file_path_val = '../../../../../../encoded_data/compare_camargo/helpdesk_all_5_val.pkl'
file_path_val = '../../../../../../encoded_data/helpdesk_all_5_val.pkl'
# Load the dataset using torch.load
helpdesk_val_dataset = torch.load(file_path_val, weights_only=False)
# Check the type of the loaded dataset
print(type(helpdesk_val_dataset))


<class 'event_log_loader.new_event_log_loader.EventLogDataset'>
<class 'event_log_loader.new_event_log_loader.EventLogDataset'>


### Train Data Insights

In [3]:
# Helpdesk Dataset Categories, Features:
helpdesk_all_categories = helpdesk_train_dataset.all_categories

helpdesk_all_categories_cat = helpdesk_all_categories[0]
print(helpdesk_all_categories_cat)

helpdesk_all_categories_num = helpdesk_all_categories[1]
print(helpdesk_all_categories_num)

for i, cat in enumerate(helpdesk_all_categories_cat):
     print(f"Helpdesk (5) Categorical feature: {cat[0]}, Index position in categorical data list: {i}")
     print(f"Helpdesk (5) Total Amount of Category labels: {cat[1]}")

print('\n')    

for i, num in enumerate(helpdesk_all_categories_num):
     print(f"Helpdesk (5) Numerical feature: {num[0]}, Index position in categorical data list: {i}")
     print(f"Helpdesk (5) Amount Numerical: {num[1]}")
     
# Get concept_name id:
concept_name = 'Activity'
concept_name_id = [i for i, cat in enumerate(helpdesk_all_categories[0]) if cat[0] == concept_name][0]
print("ID concet name in cat list: ", concept_name_id)

# Output size
concept_name = 'Activity'
concept_name_size = [cat[1] for _, cat in enumerate(helpdesk_all_categories[0]) if cat[0] == concept_name][0]
print("ID concept name in cat list: ", concept_name_size)
    
# Id of EOS token in activity
eos_value = 'EOS'
eos_id = [v for k, v in helpdesk_all_categories[0][concept_name_id][2].items() if k == eos_value][0]
# Get EOS id of concept name list:

print("ID EOS in concept name tensor: ", eos_id)


[('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15}), ('Resource', 24, {'EOS': 1, 'Value 1': 2, 'Value 10': 3, 'Value 11': 4, 'Value 12': 5, 'Value 13': 6, 'Value 14': 7, 'Value 15': 8, 'Value 16': 9, 'Value 17': 10, 'Value 18': 11, 'Value 19': 12, 'Value 2': 13, 'Value 20': 14, 'Value 21': 15, 'Value 22': 16, 'Value 3': 17, 'Value 4': 18, 'Value 5': 19, 'Value 6': 20, 'Value 7': 21, 'Value 8': 22, 'Value 9': 23}), ('Variant index', 166, {'1.0': 1, '10.0': 2, '100.0': 3, '101.0': 4, '102.0': 5, '103.0': 6, '104.0': 7, '105.0': 8, '106.0': 9, '107.0': 10, '108.0': 11, '109.0': 12, '11.0': 13, '110.0': 14, '111.0': 15, '112.0': 16, '113.0': 17, '114.0': 18, '12.0': 19, '13.0': 20, '14.0': 21, '15.0': 22, '16.0': 23, '168.0': 24, '16

### Input Features for Encoder and Decoder

In [4]:
# Create lists with name of Model features (input)
model_feat_cat = []
model_feat_num = []
for cat in helpdesk_all_categories_cat:
    model_feat_cat.append(cat[0])
for num in helpdesk_all_categories_num:
    model_feat_num.append(num[0])
model_feat = [model_feat_cat, model_feat_num]
print("Input features encoder: ", model_feat)


Input features encoder:  [['Activity', 'Resource', 'Variant index', 'seriousness', 'customer', 'product', 'responsible_section', 'seriousness_2', 'service_level', 'service_type', 'support_section', 'workgroup'], ['case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day']]


# Model

In [5]:
import joinLSTM.model
importlib.reload(joinLSTM.model)
from joinLSTM.model import FullShared_Join_LSTM

"""
Specific model parameters from paper: 
"""

# Size hidden layer
hidden_size = 50

# Number of LSTM cells
num_layers = 1

# STANDARD: One numerical output to predict
input_size = 1

# Hans Weytjens LSTM model
model = FullShared_Join_LSTM(data_set_categories=helpdesk_all_categories,
                             hidden_size=hidden_size,
                             num_layers=num_layers,
                             model_feat=model_feat,
                             input_size=input_size,
                             output_size_act=concept_name_size)

Data set categories:  ([('Activity', 16, {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'EOS': 5, 'INVALID': 6, 'Insert ticket': 7, 'RESOLVED': 8, 'Require upgrade': 9, 'Resolve SW anomaly': 10, 'Resolve ticket': 11, 'Schedule intervention': 12, 'Take in charge ticket': 13, 'VERIFIED': 14, 'Wait': 15}), ('Resource', 24, {'EOS': 1, 'Value 1': 2, 'Value 10': 3, 'Value 11': 4, 'Value 12': 5, 'Value 13': 6, 'Value 14': 7, 'Value 15': 8, 'Value 16': 9, 'Value 17': 10, 'Value 18': 11, 'Value 19': 12, 'Value 2': 13, 'Value 20': 14, 'Value 21': 15, 'Value 22': 16, 'Value 3': 17, 'Value 4': 18, 'Value 5': 19, 'Value 6': 20, 'Value 7': 21, 'Value 8': 22, 'Value 9': 23}), ('Variant index', 166, {'1.0': 1, '10.0': 2, '100.0': 3, '101.0': 4, '102.0': 5, '103.0': 6, '104.0': 7, '105.0': 8, '106.0': 9, '107.0': 10, '108.0': 11, '109.0': 12, '11.0': 13, '110.0': 14, '111.0': 15, '112.0': 16, '113.0': 17, '114.0': 18, '12.0': 19, '13.0': 20, '14.0': 21, '15.0': 22, '16.0



# Training Configuration

In [6]:
import training.train
importlib.reload(training.train)
from training.train import Training

from torch.optim.lr_scheduler import ReduceLROnPlateau

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(comment="Full_helpdesk_camargo_act")

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""
Parameter of Probabilistic Suffix Prediction experimental design, to ensure fair comparison:
"""

# Start learning rate
learning_rate = 1e-5

# Optimizer and Scheduler
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=0)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, min_lr=1e-10)

# Epochs
num_epochs = 100

# Batch of model input
batch_size = 128

# shuffle data
shuffle = True

optimize_values = {"optimizer":optimizer,
                   "scheduler": scheduler,
                   "epochs":num_epochs,
                   "mini_batches":batch_size,
                   "shuffle": shuffle}

number_tasks = len(model_feat)

trainer = Training(model=model,
                   device=device,
                   data_train=helpdesk_train_dataset,
                   data_val=helpdesk_val_dataset,
                   optimize_values=optimize_values,
                   concept_name_id=concept_name_id,
                   eos_id=eos_id,
                   writer=writer,
                   save_model_n_th_epoch=1,
                   saving_path="Helpdesk_camargo_leon.pkl")

# Train the model:
trainer.train()

Device:  cpu
Optimizer:  Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 1e-05
    maximize: False
    weight_decay: 0
)
Scheduler:  <torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x142b4e270>
Epochs:  100
Mini baches:  128
Shuffle batched dataset:  True


  0%|          | 0/100 [00:00<?, ?it/s]



Epoch [1/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 2.7749
Validation: Avg Validation Loss: 2.7705
Validation Loss for Scheduler: 2.7705
saving model
Epoch [2/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 2.7635
Validation: Avg Validation Loss: 2.7576
Validation Loss for Scheduler: 2.7576
saving model
Epoch [3/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 2.7451
Validation: Avg Validation Loss: 2.7344
Validation Loss for Scheduler: 2.7344
saving model
Epoch [4/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 2.7099
Validation: Avg Validation Loss: 2.6882
Validation Loss for Scheduler: 2.6882
saving model
Epoch [5/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 2.6481
Validation: Avg Validation Loss: 2.6203
Validation Loss for Scheduler: 2.6203
saving model
Epoch [6/100], Learning Rate: 1e-05
Training: Avg Attenuated Training Loss: 2.5626
Validation: Avg Validation Loss: 2.5361
Validat