In [1]:
# DEPENDENCIES
# Python native
import os

os.chdir("/home/tim/Development/OCPPM/")
import pprint
import functools
from typing import Any, Callable

# Data handling
import numpy as np
import ocpa.algo.predictive_monitoring.factory as feature_factory

# PyG
import torch

# # Simple machine learning models, procedure tools, and evaluation metrics
from torch_geometric.loader import DataLoader
from tqdm import tqdm

# Custom imports
from experiments.efg import EFG
from experiments.efg_sg import EFG_SG
from utilities import torch_utils
from utilities import data_utils
from utilities import training_utils
from utilities import evaluation_utils

torch_utils.print_system_info()


# Setup
cs_efg_config = {
    "model_output_path": "models/CS/efg",
    "STORAGE_PATH": "data/CS/feature_encodings/EFG/efg",
    "SPLIT_FEATURE_STORAGE_FILE": "CS_split_[C2_P2_P3_O3_eas].fs",
    "graph_level_prediction": True,
    "classification_task": True,
    "TARGET_LABEL": "event_ea4",
    "features_dtype": torch.float32,
    "target_dtype": torch.int64,
    "class_distribution": {
        0.0: 0.705315,
        1.0: 0.015818,
        2.0: 0.010882,
        3.0: 0.016800,
        4.0: 0.069764,
        5.0: 0.081383,
        6.0: 0.100038,
    },
    "SUBGRAPH_SIZE": 4,
    "BATCH_SIZE": 64,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "early_stopping": 5,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.CrossEntropyLoss(),
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "verbose": True,
    "skip_cache": False,
}
cs_efg_config["regression_task"] = True
if cs_efg_config["regression_task"]:
    cs_efg_config["target_dtype"] = torch.float32
    cs_efg_config["TARGET_LABEL"] = (feature_factory.EVENT_REMAINING_TIME, ())
    cs_efg_config["loss_fn"] = torch.nn.L1Loss()
    if "class_distribution" in cs_efg_config:
        del cs_efg_config["class_distribution"]

# ADAPTATIONS
# cs_efg_config["BATCH_SIZE"] = 64
# cs_efg_config["skip_cache"] = False

CPU: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz (4x)
Total CPU memory: 46.93GB
Available CPU memory: 34.35GB
GPU: NVIDIA GeForce GTX 960
Total GPU memory: 4096.0MB
Available GPU memory: 2872.0MB
Platform: Linux-5.19.0-46-generic-x86_64-with-glibc2.35


In [2]:
# Get data and dataloaders
(
    # ds_train,
    ds_val,
    # ds_test
) = data_utils.load_datasets(
    dataset_class=EFG_SG,
    storage_path=cs_efg_config["STORAGE_PATH"],
    split_feature_storage_file=cs_efg_config["SPLIT_FEATURE_STORAGE_FILE"],
    target_label=cs_efg_config["TARGET_LABEL"],
    graph_level_target=cs_efg_config["graph_level_prediction"],
    features_dtype=cs_efg_config["features_dtype"],
    target_dtype=cs_efg_config["target_dtype"],
    subgraph_size=cs_efg_config["SUBGRAPH_SIZE"],
    # train=True,
    val=True,
    # test=True,
    skip_cache=cs_efg_config["skip_cache"],
)

In [4]:
(
    # train_loader,
    val_loader,
    # test_loader
) = data_utils.prepare_dataloaders(
    batch_size=cs_efg_config["BATCH_SIZE"],
    # ds_train=ds_train,
    ds_val=ds_val,
    # ds_test=ds_test,
    # num_workers=0,
    seed_worker=functools.partial(
        torch_utils.seed_worker, state=cs_efg_config["RANDOM_SEED"]
    ),
    generator=torch.Generator().manual_seed(cs_efg_config["RANDOM_SEED"]),
)

In [5]:
data_utils.print_dataset_summaries(ds_val=ds_val)

Validation set


100%|██████████| 58666/58666 [00:37<00:00, 1580.64it/s]


EFG (#graphs=58666):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |      8.5 |      7.5 |
| std        |     11.3 |     11.3 |
| min        |      1   |      0   |
| quantile25 |      1   |      0   |
| median     |      3   |      2   |
| quantile75 |     12   |     11   |
| max        |    143   |    142   |
+------------+----------+----------+ 

