In [1]:
# SELF REMINDER. Copy the 'ocpa' directory to the forked one from github, so that I can push updates to github.
# Python native
import ast
import pickle
from statistics import median, mean
import pandas as pd
import numpy as np
# Data handling
# Object centric process mining
# import ocpa.algo.evaluation.precision_and_fitness.utils as evaluation_utils # COMMENTED OUT BY TIM
# import ocpa.algo.evaluation.precision_and_fitness.evaluator as precision_fitness_evaluator # COMMENTED OUT BY TIM
import ocpa.objects.log.importer.csv.factory as csv_import_factory
import ocpa.algo.predictive_monitoring.factory as feature_factory
from ocpa.objects.log.ocel import OCEL
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# # Custom GNN tools
# from gnn_utils import (
#     generate_graph_dataset,
#     # get_ordered_event_list,
#     # visualize_graph,
#     # show_remaining_times,
#     # visualize_instance,
#     # GraphDataLoader,
#     # GCN,
#     # evaluate_gnn,
# )

# PyG
import torch
from torch_geometric.loader import DataLoader
# from replicating.ocpa_PyG_integration.EventGraphDataset import EventGraphDataset
from replicating.ocpa_PyG_integration.EventSubGraphDataset import EventSubGraphDataset
from replicating.model import GCN, GAT

# PyTorch TensorBoard support
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

# Global variables
from replicating.experiment_config import STORAGE_PATH, FEATURE_STORAGE_FILE, RANDOM_SEED, TARGET_LABEL


filename = "data/adams/example_logs/mdl/BPI2017-Final.csv"
object_types = ["application", "offer"]
parameters = {
    "obj_names": object_types,
    "val_names": [],
    "act_name": "event_activity",
    "time_name": "event_timestamp",
    "sep": ",",
}
file_path_object_attribute_table = None

Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.2.0


In [2]:
with open(
        f"{STORAGE_PATH}/raw/BPI17-feature_storage-[C2,D1,P2,P3,O3].fs", "rb"
    ) as file:
        fs: FeatureStorage = pickle.load(file)

# Adams didn't give this split a random seed, 
# thus we can split the validation set in this arbitrary manner
fs.extract_normalized_train_test_split(
    test_size=0.3,
    validation_size=0.7*0.2, 
    scaler=StandardScaler,
    scaling_exempt_features=[],
    state=RANDOM_SEED,
)

In [3]:
test_mae_normed = 0.4386
fs.scaler.inverse_transform([test_mae_normed]*25)[-2]

1706496.518711365

In [33]:
normalized_data = {'prev_act':[-1,-2],TARGET_LABEL:[test_mae_normed,test_mae_normed+1],'horse':[1,1],'WRONG':[5,5]}
normalized_data_keys = [(i, str(key)) for i,key in enumerate(normalized_data.keys()) if type(key)==tuple]
original_key_idxs = np.argsort(normalized_data_keys)

# [ast.literal_eval(key) for key in normalized_data_keys]
normalized_data_keys

[(1, "('event_remaining_time', ())")]

In [5]:
event_features = ['prev_act',TARGET_LABEL,'paart','horse','rem_time']
invalid_keys = set(normalized_data.keys()) - set(event_features)
valid_keys = set(normalized_data.keys()) & set(event_features)
valid_keys

{('event_remaining_time', ()), 'horse', 'prev_act'}

In [6]:
df = pd.DataFrame(normalized_data)
valid_key_idxs_in_event_feats = {event_features.index(key) for key in valid_keys}
event_feats_idxs = set(range(len(event_features)))
absent_feat_idxs_in_normed_data_keys = event_feats_idxs - valid_key_idxs_in_event_feats
absent_normed_data = {k:v for (k,v) in zip(absent_feat_idxs_in_normed_data_keys,[[0]*len(df)]*len(absent_feat_idxs_in_normed_data_keys))}
valid_key_idxs_in_event_feats

{0, 1, 3}

In [7]:
absent_feat_idxs_in_normed_data_keys

{2, 4}

In [20]:
import ast
tup_to_str = str(TARGET_LABEL)
ast.literal_eval(tup_to_str)

('event_remaining_time', ())

In [9]:
fs.ugly_boy.head()

Unnamed: 0,event_id,"(event_preceding_activities, (Create application,))","(event_preceding_activities, (Submit,))","(event_preceding_activities, (Complete,))","(event_preceding_activities, (Accept,))","(event_preceding_activities, (Create offer,))","(event_preceding_activities, (Send (mail and online),))","(event_preceding_activities, (Call,))","(event_preceding_activities, (Validate,))","(event_preceding_activities, (Return,))",...,"(event_preceding_activities, (Handle leads,))","(event_preceding_activities, (Cancel application,))","(event_preceding_activities, (Send (online),))","(event_preceding_activities, (Assess potential fraud,))","(event_preceding_activities, (Personal loan collection,))","(event_preceding_activities, (Shorten completion,))","(event_aggregate_previous_char, (event_RequestedAmount, <built-in function max>))","(event_elapsed_time, ())","(event_remaining_time, ())","(event_previous_type_count, (offer,))"
0,236558,-0.294847,-0.233496,-0.286946,-0.294847,-0.523023,-0.328672,-0.400249,-0.33458,-0.250309,...,-0.097205,-0.183044,-0.070832,-0.031809,-0.003688,-0.013466,-1.069468,-0.710438,-0.266181,-1.175837
1,236559,3.391587,-0.233496,-0.286946,-0.294847,-0.523023,-0.328672,-0.400249,-0.33458,-0.250309,...,-0.097205,-0.183044,-0.070832,-0.031809,-0.003688,-0.013466,-1.069468,-0.710438,-0.266181,-1.175837
2,236560,-0.294847,-0.233496,3.484972,-0.294847,-0.523023,-0.328672,-0.400249,-0.33458,-0.250309,...,-0.097205,-0.183044,-0.070832,-0.031809,-0.003688,-0.013466,-1.069468,-0.710314,-0.266287,-1.175837
3,236561,-0.294847,-0.233496,-0.286946,3.391587,-0.523023,-0.328672,-0.400249,-0.33458,-0.250309,...,-0.097205,-0.183044,-0.070832,-0.031809,-0.003688,-0.013466,-1.069468,-0.626416,-0.338582,-0.048627
4,236562,-0.294847,-0.233496,-0.286946,-0.294847,1.897842,-0.328672,-0.400249,-0.33458,-0.250309,...,-0.097205,-0.183044,-0.070832,-0.031809,-0.003688,-0.013466,-1.069468,-0.626407,-0.33859,-0.048627
