In [1]:
import os
import pickle
import sys
from pathlib import Path
import pickle
from joblib import load

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')
sys.path.insert(0, '../../../..')
sys.path.insert(0, '../../../../..')

import torch
from torch.utils.data import DataLoader
from torch import nn, optim

from reimplemented_approaches.proactive_conformance_checking.models import LSTMCollectiveIDP
from reimplemented_approaches.proactive_conformance_checking.training import Training
from reimplemented_approaches.proactive_conformance_checking.data_prep_split_encode import PrefixDataset


In [2]:
# Load encoders:
# Load prepared and encoded datasets
train_set, _ = PrefixDataset.load_datasets(save_path="../../data_preparation/Helpdesk/")

print(train_set.tensors[0].size())
print(train_set.tensors[1].size())
print(train_set.tensors[2].size())
print(train_set.tensors[3].size())
print(train_set.tensors[4].size())

encoders = load("../../data_preparation/Helpdesk/encoders.pkl")
print(encoders)

activity_ids = encoders.get('activity_ids') 
activity_ids_vocab_size_with_default = len(list(activity_ids.keys())) + 1 
print("Activities: ", activity_ids_vocab_size_with_default)

resource_ids = encoders.get('resource_ids')
resource_ids_vocab_size_with_default = len(list(resource_ids.keys())) + 1
print("Resources: ", resource_ids_vocab_size_with_default)

month_ids_vocab_size_with_default = 12 + 1
print("Months: ", month_ids_vocab_size_with_default)

number_trace_attr = train_set.tensors[3].size(1)
print("Number trace attributes: ", number_trace_attr)

number_deviations_y = len([d for d in encoders.get('deviations')])
print("Number deviatons y: ", number_deviations_y)


torch.Size([12195, 15])
torch.Size([12195, 15])
torch.Size([12195, 15])
torch.Size([12195, 3])
torch.Size([12195, 6])
{'activity_ids': {'Assign seriousness': 1, 'Closed': 2, 'Create SW anomaly': 3, 'DUPLICATE': 4, 'INVALID': 5, 'Insert ticket': 6, 'RESOLVED': 7, 'Require upgrade': 8, 'Resolve SW anomaly': 9, 'Resolve ticket': 10, 'Schedule intervention': 11, 'Take in charge ticket': 12, 'VERIFIED': 13, 'Wait': 14}, 'resource_ids': {'Value 1': 1, 'Value 10': 2, 'Value 11': 3, 'Value 12': 4, 'Value 13': 5, 'Value 14': 6, 'Value 15': 7, 'Value 16': 8, 'Value 17': 9, 'Value 18': 10, 'Value 19': 11, 'Value 2': 12, 'Value 20': 13, 'Value 21': 14, 'Value 22': 15, 'Value 3': 16, 'Value 4': 17, 'Value 5': 18, 'Value 6': 19, 'Value 7': 20, 'Value 8': 21, 'Value 9': 22}, 'trace_attr_encoders': {'seriousness': LabelEncoder(), 'customer': LabelEncoder(), 'seriousness_2': LabelEncoder()}, 'deviations': ["y_('>>', 'Assign seriousness')", "y_('Create SW anomaly', '>>')", "y_('Require upgrade', '>>')",

In [3]:
# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print("Using device:", device)

Using device: cuda


In [4]:
# Embedding
embedding_dim = 16
# lstm hidden size
lstm_hidden = 128
# fully connected hidden
fc_hidden = 128
# dropout probability
p_dropout = 0.1

model = LSTMCollectiveIDP(activity_vocab_size=activity_ids_vocab_size_with_default,
                          resource_vocab_size=resource_ids_vocab_size_with_default,
                          month_vocab_size=month_ids_vocab_size_with_default,
                          num_trace_features=number_trace_attr,
                          num_output_labels=number_deviations_y,
                          # 
                          embedding_dim=embedding_dim,
                          lstm_hidden=lstm_hidden,
                          fc_hidden=fc_hidden,
                          dropout=p_dropout,
                          device=device)


In [None]:
# not mentioned in the paper:
# from code in paper
batch_size=128
# from code in paper
shuffle = True
epochs = 30 # 300 if early stopping (20% val from all train)
# from code in paper
learning_rate = 0.0001

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

optimizer_values = {"optimizer":optimizer,
                    "epochs":epochs,
                    "mini_batches":batch_size,
                    "shuffle": shuffle}

training = Training(model=model,
                    train_set=train_set,
                    optimizer_values=optimizer_values,
                    device=device,
                    saving_path='./LSTM_collecctive_IDP.pkl')

history = training.train()


Epoch 1/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 1/20 — train loss: 0.0032


Epoch 2/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 2/20 — train loss: 0.0022


Epoch 3/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 3/20 — train loss: 0.0022


Epoch 4/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 4/20 — train loss: 0.0022


Epoch 5/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 5/20 — train loss: 0.0022


Epoch 6/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 6/20 — train loss: 0.0022


Epoch 7/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 7/20 — train loss: 0.0022


Epoch 8/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 8/20 — train loss: 0.0022


Epoch 9/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 9/20 — train loss: 0.0022


Epoch 10/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 10/20 — train loss: 0.0021


Epoch 11/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 11/20 — train loss: 0.0021


Epoch 12/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 12/20 — train loss: 0.0020


Epoch 13/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 13/20 — train loss: 0.0020


Epoch 14/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 14/20 — train loss: 0.0019


Epoch 15/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 15/20 — train loss: 0.0019


Epoch 16/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 16/20 — train loss: 0.0019


Epoch 17/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 17/20 — train loss: 0.0019


Epoch 18/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 18/20 — train loss: 0.0019


Epoch 19/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 19/20 — train loss: 0.0019


Epoch 20/20:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 20/20 — train loss: 0.0019


[{'epoch': 1, 'train_loss': 0.0032320205366423},
 {'epoch': 2, 'train_loss': 0.00222105408364593},
 {'epoch': 3, 'train_loss': 0.002214982678049159},
 {'epoch': 4, 'train_loss': 0.002210204968135002},
 {'epoch': 5, 'train_loss': 0.0022062766389763224},
 {'epoch': 6, 'train_loss': 0.002204506743532064},
 {'epoch': 7, 'train_loss': 0.002204291386412958},
 {'epoch': 8, 'train_loss': 0.002203498861354185},
 {'epoch': 9, 'train_loss': 0.002200118170932963},
 {'epoch': 10, 'train_loss': 0.0021226112013503257},
 {'epoch': 11, 'train_loss': 0.0020834040530209245},
 {'epoch': 12, 'train_loss': 0.0020254103497320624},
 {'epoch': 13, 'train_loss': 0.001986355858441792},
 {'epoch': 14, 'train_loss': 0.0019346077267199681},
 {'epoch': 15, 'train_loss': 0.0019175448125860622},
 {'epoch': 16, 'train_loss': 0.0019063317644600376},
 {'epoch': 17, 'train_loss': 0.0018987938373564587},
 {'epoch': 18, 'train_loss': 0.0018751640350765206},
 {'epoch': 19, 'train_loss': 0.0018692415525320368},
 {'epoch': 20,