In [1]:
import os

os.chdir("/home/tim/Development/OCPPM/")
import logging
from loan_application_experiment.feature_encodings.hoeg.hoeg import HOEG

# from experiment.feature_encodings.efg.efg import EFG
import ocpa.algo.predictive_monitoring.factory as feature_factory
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as O
import torch_geometric.nn as pygnn
import torch_geometric.transforms as T
from loan_application_experiment.models.geometric_models import (
    AGNN_EFG,
    AdamsGCN,
    GraphModel,
    HigherOrderGNN_EFG,
)

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    filename="logging/debug.log",
)
logging.critical(f"{'-' * 32} NEW RUN {'-' * 32}")

bpi17_hoeg_config = {
    "STORAGE_PATH": "data/BPI17/feature_encodings/HOEG/hoeg",
    "SPLIT_FEATURE_STORAGE_FILE": "BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs",
    "TARGET_LABEL": (feature_factory.EVENT_REMAINING_TIME, ()),
    "OBJECTS_DATA_DICT": "bpi17_ofg+oi_graph+app_node_map+off_node_map.pkl",
    "SUBGRAPH_SIZE": 4,
    "BATCH_SIZE": 64,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "early_stopping": 5,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": True,
    "skip_cache": True,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}

  from .autonotebook import tqdm as notebook_tqdm


Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.3.1



In [2]:
dataset = HOEG(
    root=bpi17_hoeg_config["STORAGE_PATH"],
    events_filename=bpi17_hoeg_config["SPLIT_FEATURE_STORAGE_FILE"],
    objects_filename=bpi17_hoeg_config["OBJECTS_DATA_DICT"],
    label_key=bpi17_hoeg_config["TARGET_LABEL"],
    skip_cache=bpi17_hoeg_config["skip_cache"],
    transform=T.ToUndirected(),
)

Processing...
31509it [01:55, 273.17it/s]
Done!


Just some checks...

In [3]:
dataset.get_summary()

100%|██████████| 31509/31509 [00:35<00:00, 891.53it/s] 


HOEG (#graphs=31509):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     14.9 |     74.6 |
| std        |      4.1 |     21.8 |
| min        |      8   |     35   |
| quantile25 |     12   |     59   |
| median     |     14   |     69   |
| quantile75 |     17   |     84   |
| max        |     52   |    276   |
+------------+----------+----------+

In [4]:
dataset[0]

HeteroData(
  [1mevent[0m={
    x=[41, 27],
    y=[41]
  },
  [1mapplication[0m={
    x=[1, 3],
    y=[1]
  },
  [1moffer[0m={
    x=[9, 7],
    y=[9]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 139] },
  [1m(event, interacts, application)[0m={ edge_index=[2, 32] },
  [1m(event, interacts, offer)[0m={ edge_index=[2, 28] },
  [1m(application, interacts, application)[0m={ edge_index=[2, 1] },
  [1m(application, rev_interacts, event)[0m={ edge_index=[2, 32] },
  [1m(offer, rev_interacts, event)[0m={ edge_index=[2, 28] }
)