In [1]:
# %%
# DEPENDENCIES
# Python native
import functools
import json
import os

os.chdir("/home/tim/Development/OCPPM/")

import pickle
import random
from copy import copy
from datetime import datetime
from statistics import median as median
from sys import platform
from typing import Any, Callable

# Data handling
import numpy as np
import ocpa.algo.predictive_monitoring.factory as feature_factory

# PyG
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as O

# PyTorch TensorBoard support
import torch.utils.tensorboard
import torch_geometric.nn as pygnn
import torch_geometric.transforms as T

# Object centric process mining
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch import tensor
from torch.utils.tensorboard.writer import SummaryWriter
from torch_geometric.loader import DataLoader
from tqdm import tqdm

import utilities.evaluation_utils as evaluation_utils
import utilities.hetero_data_utils as hetero_data_utils
import utilities.hetero_evaluation_utils as hetero_evaluation_utils
import utilities.hetero_training_utils as hetero_training_utils
import utilities.torch_utils

# Custom imports
# from experiments.loan_application.feature_encodings.hoeg.hoeg import HOEG
from experiments.hoeg import HOEG

# from importing_ocel import build_feature_storage, load_ocel, pickle_feature_storage
from models.definitions.geometric_models import GraphModel, HeteroHigherOrderGNN

# Print system info
utilities.torch_utils.print_system_info()
utilities.torch_utils.print_torch_info()

# INITIAL CONFIGURATION
bpi17_hoeg_config = {
    "STORAGE_PATH": "data/BPI17/feature_encodings/HOEG/hoeg",
    "SPLIT_FEATURE_STORAGE_FILE": "BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs",
    "OBJECTS_DATA_DICT": "bpi17_ofg+oi_graph+app_node_map+off_node_map.pkl",
    "events_target_label": (feature_factory.EVENT_REMAINING_TIME, ()),
    "objects_target_label": "@@object_lifecycle_duration",
    "target_node_type": "event",
    "object_types": ["application", "offer"],
    "meta_data": (
        ["event", "application", "offer"],
        [
            ("event", "follows", "event"),
            ("event", "interacts", "application"),
            ("event", "interacts", "offer"),
        ],
    ),
    "BATCH_SIZE": 16,
    "RANDOM_SEED": 42,
    "EPOCHS": 32,
    "early_stopping": 8,
    "optimizer": O.Adam,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": True,
    "skip_cache": False,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}

# CONFIGURATION ADAPTATIONS may be set here
# bpi17_hoeg_config["early_stopping"] = 4
bpi17_hoeg_config["skip_cache"] = True

CPU: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz (4x)
Total CPU memory: 46.93GB
Available CPU memory: 36.67GB
GPU: NVIDIA GeForce GTX 960
Total GPU memory: 4096.0MB
Available GPU memory: 4029.0MB
Platform: Linux-5.19.0-46-generic-x86_64-with-glibc2.35
Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.3.1


In [2]:
# DATA PREPARATION
transformations = [
    T.ToUndirected(),  # Convert the graph to an undirected graph
    T.AddSelfLoops(),  # Add self-loops to the graph
    T.NormalizeFeatures(),  # Normalize node features of the graph
]
ds_train, ds_val, ds_test = hetero_data_utils.load_hetero_datasets(
    bpi17_hoeg_config["STORAGE_PATH"],
    bpi17_hoeg_config["SPLIT_FEATURE_STORAGE_FILE"],
    bpi17_hoeg_config["OBJECTS_DATA_DICT"],
    event_node_label_key=bpi17_hoeg_config["events_target_label"],
    object_nodes_label_key=bpi17_hoeg_config['objects_target_label'],
    edge_types=bpi17_hoeg_config['meta_data'][1],
    object_node_types=bpi17_hoeg_config['object_types'],
    graph_level_target = False,
    transform=T.Compose(transformations),
    train=True,
    val=True,
    test=True,
    skip_cache=bpi17_hoeg_config["skip_cache"],
)

Processing...
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
17645it [00:58, 303.69it/s]
Done!
Processing...
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
4411it [00:20, 218.54it/s]
Done!
Processing...
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
9453it [00:39, 238.79it/s]
Done!


Just some checks...

In [5]:
ds_train[0]

HeteroData(
  [1mevent[0m={
    x=[17, 27],
    y=[17]
  },
  [1mapplication[0m={
    x=[1, 3],
    y=[1]
  },
  [1moffer[0m={
    x=[2, 7],
    y=[2]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 55] },
  [1m(event, interacts, application)[0m={ edge_index=[2, 14] },
  [1m(event, interacts, offer)[0m={ edge_index=[2, 8] },
  [1m(application, rev_interacts, event)[0m={ edge_index=[2, 14] },
  [1m(offer, rev_interacts, event)[0m={ edge_index=[2, 8] }
)

In [None]:
dataset.get_summary()

100%|██████████| 31509/31509 [00:35<00:00, 897.87it/s]


HOEG (#graphs=31509):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     14.9 |     86.1 |
| std        |      4.1 |     25.3 |
| min        |      8   |     40   |
| quantile25 |     12   |     68   |
| median     |     14   |     80   |
| quantile75 |     17   |     98   |
| max        |     52   |    310   |
+------------+----------+----------+

In [None]:
dataset[0]

HeteroData(
  [1mevent[0m={
    x=[41, 27],
    y=[41]
  },
  [1mapplication[0m={
    x=[1, 3],
    y=[1]
  },
  [1moffer[0m={
    x=[9, 7],
    y=[9]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 180] },
  [1m(event, interacts, application)[0m={ edge_index=[2, 32] },
  [1m(event, interacts, offer)[0m={ edge_index=[2, 28] },
  [1m(application, rev_interacts, event)[0m={ edge_index=[2, 32] },
  [1m(offer, rev_interacts, event)[0m={ edge_index=[2, 28] }
)

In [4]:
dataset[0]

HeteroData(
  [1mevent[0m={
    x=[41, 27],
    y=[41]
  },
  [1mapplication[0m={
    x=[1, 3],
    y=[1]
  },
  [1moffer[0m={
    x=[9, 7],
    y=[9]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 180] },
  [1m(event, interacts, application)[0m={ edge_index=[2, 32] },
  [1m(event, interacts, offer)[0m={ edge_index=[2, 28] },
  [1m(application, rev_interacts, event)[0m={ edge_index=[2, 32] },
  [1m(offer, rev_interacts, event)[0m={ edge_index=[2, 28] }
)