In [1]:
# DEPENDENCIES
# Python native
import functools
import json
import os

os.chdir("/home/tim/Development/OCPPM/")
import pickle
import logging
import random
from copy import copy
from datetime import datetime
from statistics import median as median
from sys import platform
from typing import Any, Callable

# Data handling
import numpy as np
import ocpa.algo.predictive_monitoring.factory as feature_factory

# PyG
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as O

# PyTorch TensorBoard support
import torch.utils.tensorboard
import torch_geometric.nn as pygnn
import torch_geometric.transforms as T

# Object centric process mining
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch import tensor
from torch.utils.tensorboard.writer import SummaryWriter
from torch_geometric.loader import DataLoader
from tqdm import tqdm

import utilities.evaluation_utils as evaluation_utils
import utilities.hetero_data_utils as hetero_data_utils
import utilities.hetero_evaluation_utils as hetero_evaluation_utils
import utilities.hetero_training_utils as hetero_training_utils
import utilities.torch_utils

# Custom imports
# from loan_application_experiment.feature_encodings.efg.efg import EFG
from experiments.hoeg import HOEG

# from importing_ocel import build_feature_storage, load_ocel, pickle_feature_storage
from models.definitions.geometric_models import GraphModel, HigherOrderGNN

# Print system info
utilities.torch_utils.print_system_info()
utilities.torch_utils.print_torch_info()

# INITIAL CONFIGURATION
cs_hoeg_config = {
    "STORAGE_PATH": "data/CS/feature_encodings/HOEG/hoeg",
    "SPLIT_FEATURE_STORAGE_FILE": "CS_split_[C2_P2_P3_O3_eas].fs",
    "events_target_label": (feature_factory.EVENT_REMAINING_TIME, ()),
    "objects_target_label": "@@object_lifecycle_duration",
    "OBJECTS_DATA_DICT": "cs_ofg+oi_graph+krs_node_map+krv_node_map+cv_node_map.pkl",
    "BATCH_SIZE": 16,
    "RANDOM_SEED": 42,
    "EPOCHS": 32,
    "target_node_type": "event",
    "object_types": ["krs", "krv", "cv"],
    "meta_data": (
        ["event", "krs", "krv", "cv"],
        [
            ("event", "follows", "event"),
            ("event", "interacts", "krs"),
            ("event", "interacts", "krv"),
            ("event", "interacts", "cv"),
        ],
    ),
    "early_stopping": 8,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": False,
    "skip_cache": False,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}

# CONFIGURATION ADAPTATIONS may be set here
# cs_hoeg_config["early_stopping"] = 4
# cs_hoeg_config['skip_cache'] = True
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    filename="logging/debug.log",
)
logging.critical("-" * 32 + " TEST CS HOEG " + "-" * 32)

CRITICAL:root:-------------------------------- TEST CS HOEG --------------------------------


CPU: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz (4x)
Total CPU memory: 46.93GB
Available CPU memory: 31.92GB
GPU: NVIDIA GeForce GTX 960
Total GPU memory: 4096.0MB
Available GPU memory: 4029.0MB
Platform: Linux-6.2.0-26-generic-x86_64-with-glibc2.35
Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.3.1


In [35]:
torch.arange(3)

tensor([0, 1, 2])

In [58]:
import time

edge_index = torch.tensor(
    [
        [0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3],
        [0, 1, 3, 4, 0, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23],
    ],
    dtype=torch.int64,
)


# Original fix_edge_index function
def correct_object_event_edge_index(
    edge_index: torch.Tensor,
    allowed_object_indices: torch.Tensor,
    object_node_type_position: int,
) -> torch.Tensor:
    object_id_row = int(bool(object_node_type_position))
    event_id_row = int(not (bool(object_node_type_position)))
    allowed_object_indices, _ = torch.sort(allowed_object_indices)
    mask = ~torch.isin(edge_index[object_id_row], allowed_object_indices)
    edge_index[event_id_row, mask] = edge_index[object_id_row, mask]
    edge_index[object_id_row, mask] = allowed_object_indices[0]  # Note the difference
    return edge_index


# Vectorized fix_edge_index function
def _correct_object_event_edge_index(
    edge_index: torch.Tensor,
    num_object_nodes: int,
    object_node_type_position: int,
) -> torch.Tensor:
    object_id_row = int(bool(object_node_type_position))
    event_id_row = int(not (bool(object_node_type_position)))
    allowed_object_indices = torch.arange(num_object_nodes)
    mask = ~torch.isin(edge_index[object_id_row], allowed_object_indices)
    edge_index[event_id_row, mask] = edge_index[object_id_row, mask]
    edge_index[object_id_row, mask] = allowed_object_indices[-1]  # Note the difference
    return edge_index


# Generate a large edge_index for testing
num_edges = 100000
# edge_index = torch.randint(0, 100, (2, num_edges))

# Test the speed of the original function
start_time = time.time()
fixed_edge_index_original = correct_object_event_edge_index(
    edge_index.clone(), torch.tensor([0, 1]), 0
)
original_duration = time.time() - start_time

# Test the speed of the vectorized function
start_time = time.time()
fixed_edge_index_vectorized = _correct_object_event_edge_index(edge_index.clone(), 2, 0)
vectorized_duration = time.time() - start_time

print(
    "Functions yield equal result: ",
    bool((fixed_edge_index_original == fixed_edge_index_vectorized).all()),
)
print("Final class method duration:", original_duration)
print("Vectorized function duration:", vectorized_duration)
print("Speedup factor:", original_duration / vectorized_duration)

Functions yield equal result:  False
Final class method duration: 0.000293731689453125
Vectorized function duration: 0.00013327598571777344
Speedup factor: 2.2039355992844363


In [64]:
correct_object_event_edge_index(edge_index.clone(), torch.tensor([0, 1]), 0)
_correct_object_event_edge_index(edge_index.clone(), 2, 0)

tensor([[ 0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1],
        [ 0,  1,  3,  4,  6, 12, 13, 16, 17, 18, 19, 20, 21, 22,  3]])

In [3]:
feature_storage_out_file = (
    "data/CS/feature_encodings/EFG/efg/raw/CS_split_[C2_P2_P3_O3_eas].fs"
)
with open(feature_storage_out_file, "rb") as binary:
    fs: FeatureStorage = pickle.load(binary)

In [25]:
fg = fs.feature_graphs[83116]

In [26]:
unique_objects = set(item for obj in fg.objects.values() for item in obj)

In [27]:
from collections import defaultdict


def __set_to_split_dict(unique_objects: set[tuple[str, str]]) -> dict[str, list[str]]:
    # Function that splits a set of [object type, object id]
    # into a dict with object types as keys and object ids as values
    result = defaultdict(list)
    for item in unique_objects:
        result[item[0]].append(item[1])
    return dict(result)


unique_objects_dict = __set_to_split_dict(unique_objects)
object_node_map = {
    key: {value: index for index, value in enumerate(value_list)}
    for key, value_list in unique_objects_dict.items()
}
object_node_map

{'krs': {'KRS-6197856': 0}}

In [3]:
kwargs = {
    "root": cs_hoeg_config["STORAGE_PATH"],
    "events_filename": cs_hoeg_config["SPLIT_FEATURE_STORAGE_FILE"],
    "objects_filename": cs_hoeg_config["OBJECTS_DATA_DICT"],
    "event_node_label_key": cs_hoeg_config["events_target_label"],
    "object_nodes_label_key": cs_hoeg_config["objects_target_label"],
    "edge_types": cs_hoeg_config["meta_data"][1],
    "object_node_types": cs_hoeg_config["object_types"],
    "graph_level_target": False,
    "skip_cache": cs_hoeg_config["skip_cache"],
}
HOEG(**kwargs)

Processing...
181it [00:01, 94.84it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 163
(1696790, 'KRV-5060456')
{'KRV-6657256': 0}


972it [00:09, 101.24it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 958
(360386, 'KRS-7579265')
{'KRS-8339882': 0}


1466it [00:14, 107.22it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 1446
(1395165, 'KRV-5033027')
{'KRV-1525213': 0}


1552it [00:15, 92.74it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 1534
(596916, 'KRV-8581988')
{'KRV-9862277': 0}


1608it [00:15, 106.25it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 1595
(62955, 'KRS-1227798')
{'KRS-7803609': 0}


1819it [00:18, 104.64it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 1797
(863527, 'KRV-149336')
{'KRV-4482379': 0}


1944it [00:19, 93.68it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 1929
(1609588, 'KRS-8978524')
{'KRS-9341017': 0}


2215it [00:22, 103.69it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2198
(487787, 'KRV-6042680')
{'KRV-3197183': 0}


2269it [00:22, 102.44it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2256
(1349820, 'KRV-5131099')
{'KRV-9704393': 0}
in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2261
(726301, 'KRV-838923')
{'KRV-1668011': 0}
in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2269
(360695, 'KRS-4194471')
{'KRS-7671141': 0}


2444it [00:24, 95.53it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2425
(595516, 'KRS-3482974')
{'KRS-5001136': 0}


2574it [00:25, 122.49it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2547
(1584691, 'KRV-9563103')
{'KRV-9003632': 0}


2622it [00:26, 113.18it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 2609
(494528, 'KRS-7140697')
{'KRS-5201924': 0}


3600it [00:36, 78.67it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 3590
(751076, 'KRS-1726037')
{'KRS-8910044': 0}


3951it [00:39, 79.28it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 3938
(1209092, 'KRV-5283537')
{'KRV-6204763': 0}


4185it [00:42, 109.77it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 4166
(589660, 'KRV-8101442')
{'KRV-1304186': 0}


4263it [00:43, 94.66it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 4245
(1054451, 'KRV-2448420')
{'KRV-6605662': 0}


4777it [00:48, 73.25it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 4767
(1106228, 'KRV-3360714')
{'KRV-6599082': 0}


4822it [00:49, 79.26it/s]

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 4809
(1585391, 'KRV-4399117')
{'KRV-5055432': 0}


4940it [00:50, 85.36it/s] 

in self.__get_edge_index_for_edge_type()
fg.pexec_id: 4925
(677820, 'KRV-2608872')
{'KRV-689761': 0}


5003it [00:51, 97.08it/s]


KeyboardInterrupt: 