In [1]:
import os

go_up_n_directories = lambda path, n: os.path.abspath(
    os.path.join(*([os.path.dirname(path)] + [".."] * n))
)
os.chdir(go_up_n_directories(os.getcwd(), 3))  # run once (otherwise restart kernel)
os.getcwd()

'/home/tim/Development/OCPPM'

In [2]:
# DEPENDENCIES
# Python native
import os
import pickle
import pprint
import random
import functools
import json
import time
from copy import copy
from datetime import datetime
from statistics import median as median
from sys import platform
from typing import Any, Callable

# Data handling
import numpy as np
import ocpa.algo.predictive_monitoring.factory as feature_factory

# PyG
import torch

# PyTorch TensorBoard support
import torch.utils.tensorboard

# Object centric process mining
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.tensorboard.writer import SummaryWriter
from torch_geometric.loader import DataLoader
import torch_geometric.transforms as T
from tqdm import tqdm
from torch import tensor

# Custom imports
from experiments.efg import EFG
from experiments.efg_sg import EFG_SG
from utilities import torch_utils
from utilities import data_utils
from utilities import training_utils
from utilities import evaluation_utils

# from importing_ocel import build_feature_storage, load_ocel, pickle_feature_storage
from models.definitions.geometric_models import (
    AGNN_EFG,
    AdamsGCN,
    GraphModel,
    HigherOrderGNN_EFG,
    SimpleGNN_EFG,
)
import torch_geometric.nn as pygnn
import torch.nn.functional as F
import torch.optim as O
import torch.nn as nn

# Print system info
torch_utils.print_system_info()

# Setup
bpi17_efg_config = {
    "model_output_path": "models/BPI17/efg",
    "STORAGE_PATH": "data/BPI17/feature_encodings/EFG/efg",
    "SPLIT_FEATURE_STORAGE_FILE": "BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs",
    "TARGET_LABEL": (feature_factory.EVENT_REMAINING_TIME, ()),
    "regression_task": True,
    "graph_level_prediction": True,
    "features_dtype": torch.float32,
    "target_dtype": torch.float32,
    "SUBGRAPH_SIZE": 4,
    "BATCH_SIZE": 64,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "early_stopping": 4,
    "hidden_dim": 16,
    "optimizer": O.Adam,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": True,
    "track_time": True,
    "skip_cache": False,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

# ADAPTATIONS
# bpi17_efg_config["optimizer_settings"] = {
#     "lr": 5e-4,
#     "betas": (0.9, 0.999),
#     "eps": 1e-08,
#     "weight_decay": 0,
#     "amsgrad": False,
# }
# bpi17_hoeg_config["loss_fn"] = torch.nn.L1Loss()
# bpi17_efg_config["BATCH_SIZE"] = 64
# bpi17_efg_config["EPOCHS"] = 30
# bpi17_efg_config["early_stopping"] = 5
# bpi17_efg_config['skip_cache']=True

CPU: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz (4x)
Total CPU memory: 46.93GB
Available CPU memory: 35.92GB
GPU: NVIDIA GeForce GTX 960
Total GPU memory: 4096.0MB
Available GPU memory: 4029.0MB
Platform: Linux-5.19.0-46-generic-x86_64-with-glibc2.35


In [3]:
# Get data and dataloaders
ds_train, ds_val, ds_test = data_utils.load_datasets(
    dataset_class=EFG_SG,
    storage_path=bpi17_efg_config["STORAGE_PATH"],
    split_feature_storage_file=bpi17_efg_config["SPLIT_FEATURE_STORAGE_FILE"],
    target_label=bpi17_efg_config["TARGET_LABEL"],
    graph_level_target=bpi17_efg_config["graph_level_prediction"],
    features_dtype=bpi17_efg_config["features_dtype"],
    target_dtype=bpi17_efg_config["target_dtype"],
    subgraph_size=bpi17_efg_config["SUBGRAPH_SIZE"],
    train=True,
    val=True,
    test=True,
    skip_cache=bpi17_efg_config["skip_cache"],
)
train_loader, val_loader, test_loader = data_utils.prepare_dataloaders(
    batch_size=bpi17_efg_config["BATCH_SIZE"],
    ds_train=ds_train,
    ds_val=ds_val,
    ds_test=ds_test,
    seed_worker=functools.partial(
        torch_utils.seed_worker, state=bpi17_efg_config["RANDOM_SEED"]
    ),
    generator=torch.Generator().manual_seed(bpi17_efg_config["RANDOM_SEED"]),
)

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [4]:
# # MODEL INITIALIZATION
# model = HigherOrderGNN_EFG(bpi17_efg_config['hidden_dim'], 1)
# # pretrained_state_dict = torch.load("models/runs/GraphConvNet_20230718_13h59m/state_dict_epoch6.pt")
# # model.load_state_dict(pretrained_state_dict)
# model.to(bpi17_efg_config["device"])

# # Print summary of data and model
# if bpi17_efg_config["verbose"]:
#     print(model)
#     with torch.no_grad():  # Initialize lazy modules, s.t. we can count its parameters.
#         batch = next(iter(train_loader))
#         batch.to(bpi17_efg_config["device"])
#         out = model(batch.x.float(), batch.edge_index, batch.batch)
#         print(f"Number of parameters: {torch_utils.count_parameters(model)}")

HigherOrderGNN_EFG(
  (conv1): GraphConv(-1, 16)
  (conv2): GraphConv(-1, 16)
  (act1): PReLU(num_parameters=1)
  (act2): PReLU(num_parameters=1)
  (lin_out): Linear(-1, 1, bias=True)
)
Number of parameters: 1427


In [None]:
# # TRAINING
# print("Training started, progress available in Tensorboard")
# torch.cuda.empty_cache()

# timestamp = datetime.now().strftime("%Y%m%d_%Hh%Mm")
# model_path_base = f"models/BPI17/efg/lr={bpi17_efg_config['optimizer_settings']['lr']}_hidden_dim={hidden_dim}/{str(model).split('(')[0]}_{timestamp}"

# best_state_dict_path = training_utils.run_training(
#     num_epochs=bpi17_efg_config["EPOCHS"],
#     model=model,
#     train_loader=train_loader,
#     validation_loader=val_loader,
#     optimizer=bpi17_efg_config["optimizer"](
#         model.parameters(), **bpi17_efg_config["optimizer_settings"]
#     ),
#     loss_fn=bpi17_efg_config["loss_fn"],
#     early_stopping_criterion=bpi17_efg_config["early_stopping"],
#     model_path_base=model_path_base,
#     x_dtype=bpi17_efg_config["features_dtype"],
#     y_dtype=bpi17_efg_config["target_dtype"],
#     device=bpi17_efg_config["device"],
#     verbose=True,
# )
# # Write experiment settings as JSON into model path (of the model we've just trained)
# with open(os.path.join(model_path_base, "experiment_settings.json"), "w") as file_path:
#     json.dump(evaluation_utils.get_json_serializable_dict(bpi17_efg_config), file_path)

In [7]:
# # Evaluation
# state_dict_path = "models/BPI17/efg/AGNN_20230714_12h19m"  # 0.59 test mae
# state_dict_path = "models/BPI17/efg/AGNN_20230714_14h26m"  # 0.54 test mae
# state_dict_path = "models/BPI17/efg/AGNN_20230717_15h16m"  # 0.48 test mae ()
# state_dict_path = "models/BPI17/efg/AGNN_20230717_16h37m"  # 0.47 test mae
# state_dict_path = "models/BPI17/efg/AGNN_20230717_15h51m"  # 0.4557 test mae (ChebConv)
# state_dict_path = "models/BPI17/efg/AGNN_20230717_16h58m"  # 0.4546 test mae
# state_dict_path = "models/BPI17/efg/AGNN_20230717_23h22m"  # 0.4534 test mae
# state_dict_path = (
#     "models/BPI17/efg/SimpleGNN_20230718_09h30m"  # 0.4382 test mae | 6k params
# )
# state_dict_path = (
#     "models/BPI17/efg/TransformerGNN_20230718_09h46m"  # 0.4290 test mae | 24k params
# )
# state_dict_path = (
#     "models/BPI17/efg/GraphConvArch_20230718_10h08m"  # 0.4248 test mae | 12k params
# )
# state_dict_path = (
#     "models/BPI17/efg/GraphConvNet_20230718_11h35m"  # 0.4149 test mae | 7k params
# )
# state_dict_path = (
#     "models/BPI17/efg/GraphConvNet_20230718_11h54m"  # 0.4113 test mae | 7k params
# )
# state_dict_path = "models/BPI17/efg/GraphConvNet_20230718_13h59m"  # 0.4040 test mae | 7k params | fine-tuning pretrained 'GraphConvNet_20230718_11h54m'  // best so far!
# state_dict_path = (
#     "models/BPI17/efg/HigherOrderGNN_EFG_20230720_13h11m"  # 0.4087 test mae | 7k params
# )

# # Get model evaluation report
# evaluation_report = evaluation_utils.get_best_model_evaluation(
#     model_state_dict_path=best_state_dict_path,
#     train_loader=train_loader,
#     val_loader=val_loader,
#     test_loader=test_loader,
#     model=model,
#     evaluation_reporter=evaluation_utils.get_evaluation,
#     regression=True,
#     verbose=bpi17_efg_config["verbose"],
#     track_time=bpi17_efg_config["track_time"],
# )

# # Store model results as JSON into model path
# with open(os.path.join(model_path_base, "evaluation_report.json"), "w") as file_path:
#     json.dump(evaluation_utils.get_json_serializable_dict(evaluation_report), file_path)

# # Print evaluation report
# pprint.pprint(evaluation_report)

100%|██████████| 2626/2626 [01:16<00:00, 34.35it/s]
100%|██████████| 657/657 [00:18<00:00, 36.15it/s]
100%|██████████| 699/699 [00:28<00:00, 24.40it/s]

{'Test': {'report': {'MAE': 0.4087477,
                     'MAPE': 2.7260685,
                     'MSE': 0.4682943,
                     'R^2': -0.015702805917782614}},
 'Train': {'report': {'MAE': 0.4119272,
                      'MAPE': 6.021801,
                      'MSE': 0.48191255,
                      'R^2': -0.0268219392311162}},
 'Validation': {'report': {'MAE': 0.42308843,
                           'MAPE': 4.256179,
                           'MSE': 0.49531737,
                           'R^2': -0.07168010063702734}}}





### Final hyperparameter tuning

In [11]:
def run_experiment_configuration(
    model_class: GraphModel,
    lr: float,
    hidden_dim: int,
    device: torch.device = None,
    track_time: bool = True,
    verbose: bool = False,
) -> None:
    if device:
        bpi17_efg_config["device"] = device
    bpi17_efg_config["verbose"] = verbose
    bpi17_efg_config["track_time"] = track_time
    # HYPERPARAMETER INITIALIZATION (those that we tune)
    bpi17_efg_config["hidden_dim"] = hidden_dim
    bpi17_efg_config["optimizer_settings"]["lr"] = lr
    # MODEL INITIALIZATION
    model = model_class(hidden_dim, 1)
    # pretrained_state_dict = torch.load("models/runs/GraphConvNet_20230718_13h59m/state_dict_epoch6.pt")
    # model.load_state_dict(pretrained_state_dict)
    model.to(bpi17_efg_config["device"])

    # TRAINING
    if verbose:
        print("Training started, progress available in Tensorboard")
    torch.cuda.empty_cache()

    start_train_time = datetime.now()
    timestamp = start_train_time.strftime("%Y%m%d_%Hh%Mm")
    model_path_base = f"{bpi17_efg_config['model_output_path']}/lr={lr}_hidden_dim={hidden_dim}/{str(model).split('(')[0]}_{timestamp}"

    best_state_dict_path = training_utils.run_training(
        num_epochs=bpi17_efg_config["EPOCHS"],
        model=model,
        train_loader=train_loader,
        validation_loader=val_loader,
        optimizer=bpi17_efg_config["optimizer"](
            model.parameters(), **bpi17_efg_config["optimizer_settings"]
        ),
        loss_fn=bpi17_efg_config["loss_fn"],
        early_stopping_criterion=bpi17_efg_config["early_stopping"],
        model_path_base=model_path_base,
        x_dtype=bpi17_efg_config["features_dtype"],
        y_dtype=bpi17_efg_config["target_dtype"],
        device=bpi17_efg_config["device"],
        verbose=bpi17_efg_config["verbose"],
    )

    total_train_time = datetime.now() - start_train_time
    # Write experiment settings as JSON into model path (of the model we've just trained)
    with open(
        os.path.join(model_path_base, "experiment_settings.json"), "w"
    ) as file_path:
        json.dump(
            evaluation_utils.get_json_serializable_dict(bpi17_efg_config),
            file_path,
            indent=2,
        )

    # EVALUATION
    # Get model evaluation report
    evaluation_report = evaluation_utils.get_best_model_evaluation(
        model_state_dict_path=best_state_dict_path,
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        model=model,
        evaluation_reporter=evaluation_utils.get_evaluation,
        regression=True,
        verbose=bpi17_efg_config["verbose"],
        track_time=bpi17_efg_config["track_time"],
    )
    evaluation_report["Train"]["report"]["training_time"] = total_train_time
    # Store model results as JSON into model path
    model_architecture = torch_utils.parse_model_string(model)
    model_architecture["Number of parameters"] = torch_utils.count_parameters(model)
    with open(
        os.path.join(model_path_base, "model_architecture.json"), "w"
    ) as file_path:
        json.dump(model_architecture, file_path, indent=2)

    with open(
        os.path.join(model_path_base, "evaluation_report.json"), "w"
    ) as file_path:
        json.dump(
            evaluation_utils.get_json_serializable_dict(evaluation_report),
            file_path,
            indent=2,
        )

    # Print evaluation report
    print(f"lr={lr}, hidden_dim={hidden_dim}:")
    print(f"    {model_architecture['Number of parameters']} parameters")
    print(f"    {evaluation_report['Train']['report']['training_time']} H:m:s")
    print(f"    {evaluation_report['Test']['report']['MAE']:.4f}")
    print()

In [12]:
lr_range = [0.01, 0.001]
hidden_dim_range = [8, 16, 24, 32, 48, 64, 128, 256]
for lr in lr_range:
    for hidden_dim in hidden_dim_range:
        run_experiment_configuration(
            model_class=HigherOrderGNN_EFG,
            lr=lr,
            hidden_dim=hidden_dim,
            track_time=True,
            verbose=False,
        )

Early stopping after 18 epochs.
lr=0.01, hidden_dim=8:
    587 parameters
    0:11:19.402490 H:m:s
    0.4284

Early stopping after 6 epochs.
lr=0.01, hidden_dim=16:
    1427 parameters
    0:03:57.227688 H:m:s
    0.4321

Early stopping after 10 epochs.
lr=0.01, hidden_dim=24:
    2523 parameters
    0:06:36.402405 H:m:s
    0.4238

Early stopping after 15 epochs.
lr=0.01, hidden_dim=32:
    3875 parameters
    0:10:18.807842 H:m:s
    0.4219

Early stopping after 14 epochs.
lr=0.01, hidden_dim=48:
    7347 parameters
    0:09:24.318881 H:m:s
    0.4240

Early stopping after 8 epochs.
lr=0.01, hidden_dim=64:
    11843 parameters
    0:05:14.845230 H:m:s
    0.4272

Early stopping after 13 epochs.
lr=0.01, hidden_dim=128:
    40067 parameters
    0:09:49.965929 H:m:s
    0.4287

Early stopping after 6 epochs.
lr=0.01, hidden_dim=256:
    145667 parameters
    0:04:15.462002 H:m:s
    0.4461

Early stopping after 27 epochs.
lr=0.001, hidden_dim=8:
    587 parameters
    0:21:30.648002 H