In [1]:
# DEPENDENCIES
# Python native
import os

os.chdir("/home/tim/Development/OCPPM/")

import pickle
import pprint
import random
import functools
import json
from copy import copy
from datetime import datetime
from statistics import median as median
from sys import platform
from typing import Any, Callable

# Data handling
import numpy as np
import ocpa.algo.predictive_monitoring.factory as feature_factory

# PyG
import torch

# PyTorch TensorBoard support
import torch.utils.tensorboard

# Object centric process mining
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.tensorboard.writer import SummaryWriter
from torch_geometric.loader import DataLoader
import torch_geometric.transforms as T
from tqdm import tqdm
from torch import tensor

# Custom imports
# from loan_application_experiment.feature_encodings.efg.efg import EFG
from loan_application_experiment.feature_encodings.efg.efg_sg import EFG_SG
from utilities import torch_utils
from utilities import data_utils
from utilities import training_utils
from utilities import evaluation_utils

# from importing_ocel import build_feature_storage, load_ocel, pickle_feature_storage
from loan_application_experiment.models.geometric_models import (
    AGNN_EFG,
    AdamsGCN,
    GraphModel,
    HigherOrderGNN_EFG,
    SimpleGNN_EFG,
)
import torch_geometric.nn as pygnn
import torch.nn.functional as F
import torch.optim as O
import torch.nn as nn

# Print system info
torch_utils.print_system_info()

# Setup
bpi17_config = {
    "STORAGE_PATH": "data/BPI17/feature_encodings/EFG/efg",
    "SPLIT_FEATURE_STORAGE_FILE": "BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs",
    "TARGET_LABEL": (feature_factory.EVENT_REMAINING_TIME, ()),
    "SUBGRAPH_SIZE": 4,
    "BATCH_SIZE": 64,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "early_stopping": 5,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": True,
    "skip_cache": False,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

# ADAPTATIONS
# bpi17_config["optimizer_settings"] = {
#     "lr": 5e-4,
#     "betas": (0.9, 0.999),
#     "eps": 1e-08,
#     "weight_decay": 0,
#     "amsgrad": False,
# }
# bpi17_config["loss_fn"] = torch.nn.L1Loss()
# bpi17_config["BATCH_SIZE"] = 64
# bpi17_config["EPOCHS"] = 30
# bpi17_config["early_stopping"] = 5

  from .autonotebook import tqdm as notebook_tqdm


Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.3.1



In [3]:
# Get data and dataloaders
ds_train, ds_val, ds_test = data_utils.load_datasets(
    bpi17_config["STORAGE_PATH"],
    bpi17_config["SPLIT_FEATURE_STORAGE_FILE"],
    bpi17_config["TARGET_LABEL"],
    bpi17_config["SUBGRAPH_SIZE"],
    train=True,
    val=True,
    test=True,
    skip_cache=bpi17_config["skip_cache"],
)
train_loader, val_loader, test_loader = data_utils.prepare_dataloaders(
    batch_size=bpi17_config["BATCH_SIZE"],
    ds_train=ds_train,
    ds_val=ds_val,
    ds_test=ds_test,
    seed_worker=functools.partial(
        torch_utils.seed_worker, state=bpi17_config["RANDOM_SEED"]
    ),
    generator=torch.Generator().manual_seed(bpi17_config["RANDOM_SEED"]),
)

In [4]:
model = HigherOrderGNN_EFG(48, 1)
# pretrained_state_dict = torch.load("models/runs/GraphConvNet_20230718_13h59m/state_dict_epoch6.pt")
# model.load_state_dict(pretrained_state_dict)
model.to(bpi17_config["device"])

# Print summary of data and model
if bpi17_config["verbose"]:
    print(model)
    with torch.no_grad():  # Initialize lazy modules, s.t. we can count its parameters.
        batch = next(iter(train_loader))
        batch.to(bpi17_config["device"])
        out = model(batch.x.float(), batch.edge_index, batch.batch)
        print(f"Number of parameters: {torch_utils.count_parameters(model)}")

HigherOrderGNN_EFG(
  (conv1): GraphConv(-1, 48)
  (conv2): GraphConv(-1, 48)
  (act1): PReLU(num_parameters=1)
  (act2): PReLU(num_parameters=1)
  (lin_out): Linear(-1, 1, bias=True)
)
Number of parameters: 7347


In [None]:
# Training
print("Training started, progress available in Tensorboard")
torch.cuda.empty_cache()

timestamp = datetime.now().strftime("%Y%m%d_%Hh%Mm")
model_path_base = f"models/BPI17/efg/{str(model).split('(')[0]}_{timestamp}"

best_state_dict_path = training_utils.run_training(
    num_epochs=bpi17_config["EPOCHS"],
    model=model,
    train_loader=train_loader,
    validation_loader=val_loader,
    optimizer=O.Adam(model.parameters(), **bpi17_config["optimizer_settings"]),
    loss_fn=bpi17_config["loss_fn"],
    early_stopping_criterion=bpi17_config["early_stopping"],
    model_path_base=model_path_base,
    device=bpi17_config["device"],
    verbose=True,
)
# Write experiment settings as JSON into model path (of the model we've just trained)
with open(os.path.join(model_path_base, "experiment_settings.json"), "w") as file_path:
    json.dump(evaluation_utils.get_json_serializable_dict(bpi17_config), file_path)

In [7]:
# Evaluation
state_dict_path = "models/BPI17/efg/AGNN_20230714_12h19m"  # 0.59 test mae
state_dict_path = "models/BPI17/efg/AGNN_20230714_14h26m"  # 0.54 test mae
state_dict_path = "models/BPI17/efg/AGNN_20230717_15h16m"  # 0.48 test mae ()
state_dict_path = "models/BPI17/efg/AGNN_20230717_16h37m"  # 0.47 test mae
state_dict_path = "models/BPI17/efg/AGNN_20230717_15h51m"  # 0.4557 test mae (ChebConv)
state_dict_path = "models/BPI17/efg/AGNN_20230717_16h58m"  # 0.4546 test mae
state_dict_path = "models/BPI17/efg/AGNN_20230717_23h22m"  # 0.4534 test mae
state_dict_path = (
    "models/BPI17/efg/SimpleGNN_20230718_09h30m"  # 0.4382 test mae | 6k params
)
state_dict_path = (
    "models/BPI17/efg/TransformerGNN_20230718_09h46m"  # 0.4290 test mae | 24k params
)
state_dict_path = (
    "models/BPI17/efg/GraphConvArch_20230718_10h08m"  # 0.4248 test mae | 12k params
)
state_dict_path = (
    "models/BPI17/efg/GraphConvNet_20230718_11h35m"  # 0.4149 test mae | 7k params
)
state_dict_path = (
    "models/BPI17/efg/GraphConvNet_20230718_11h54m"  # 0.4113 test mae | 7k params
)
state_dict_path = "models/BPI17/efg/GraphConvNet_20230718_13h59m"  # 0.4040 test mae | 7k params | fine-tuning pretrained 'GraphConvNet_20230718_11h54m'  // best so far!
state_dict_path = (
    "models/BPI17/efg/HigherOrderGNN_EFG_20230720_13h11m"  # 0.4087 test mae | 7k params
)

# Get model evaluation report
evaluation_report = evaluation_utils.get_best_model_evaluation(
    model_state_dict_path=best_state_dict_path,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    model=model,
    evaluation_reporter=evaluation_utils.get_evaluation,
    regression=True,
    classification=False,
    verbose=bpi17_config["verbose"],
)

# Store model results as JSON into model path
with open(os.path.join(model_path_base, "evaluation_report.json"), "w") as file_path:
    json.dump(evaluation_utils.get_json_serializable_dict(evaluation_report), file_path)

# Print evaluation report
pprint.pprint(evaluation_report)

100%|██████████| 2626/2626 [01:16<00:00, 34.35it/s]
100%|██████████| 657/657 [00:18<00:00, 36.15it/s]
100%|██████████| 699/699 [00:28<00:00, 24.40it/s]

{'Test': {'report': {'MAE': 0.4087477,
                     'MAPE': 2.7260685,
                     'MSE': 0.4682943,
                     'R^2': -0.015702805917782614}},
 'Train': {'report': {'MAE': 0.4119272,
                      'MAPE': 6.021801,
                      'MSE': 0.48191255,
                      'R^2': -0.0268219392311162}},
 'Validation': {'report': {'MAE': 0.42308843,
                           'MAPE': 4.256179,
                           'MSE': 0.49531737,
                           'R^2': -0.07168010063702734}}}



