In [11]:
# Python native
import os
os.chdir("/home/tim/Development/OCPPM/")

import pickle
import pprint
import random
import json
from copy import copy
from datetime import datetime
from statistics import median as median
from sys import platform
from typing import Any, Callable

# Data handling
import numpy as np
import ocpa.algo.predictive_monitoring.factory as feature_factory

# PyG
import torch

# PyTorch TensorBoard support
import torch.utils.tensorboard

# Object centric process mining
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.tensorboard.writer import SummaryWriter
from torch_geometric.loader import DataLoader
import torch_geometric.transforms as T
from tqdm import tqdm
from torch import tensor

# Custom imports
# from loan_application_experiment.feature_encodings.efg.efg import EFG
from loan_application_experiment.feature_encodings.efg.efg_sg import EFG_SG
from loan_application_experiment.feature_encodings.efg.efg import EFG
from custom_utilities import evaluation_helpers

# from importing_ocel import build_feature_storage, load_ocel, pickle_feature_storage
from loan_application_experiment.models.geometric_models import (
    AGNN_EFG,
    AdamsGCN,
    GraphModel,
    HigherOrderGNN_EFG,
    SimpleGNN_EFG
)
import torch_geometric.nn as pygnn
import torch.nn.functional as F
import torch.optim as O
import torch.nn as nn

# Setup
bpi17_config = {
    "STORAGE_PATH": "data/BPI17/feature_encodings/EFG/efg",
    "SPLIT_FEATURE_STORAGE_FILE": "BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs",
    "TARGET_LABEL": (feature_factory.EVENT_REMAINING_TIME, ()),
    "SUBGRAPH_SIZE": 4,
    "BATCH_SIZE": 64,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "early_stopping": 5,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": True,
    "skip_cache": False,
}

# ADAPTATIONS
# bpi17_config["optimizer_settings"] = {
#     "lr": 5e-4,
#     "betas": (0.9, 0.999),
#     "eps": 1e-08,
#     "weight_decay": 0,
#     "amsgrad": False,
# }
# bpi17_config["loss_fn"] = torch.nn.L1Loss()
# bpi17_config["BATCH_SIZE"] = 64
# bpi17_config["EPOCHS"] = 30
# bpi17_config["early_stopping"] = 5

In [9]:
def load_datasets(
    storage_path: str,
    split_feature_storage_file: str,
    target_label: tuple[str, tuple],
    subgraph_size: int,
    transform=None,
    train: bool = True,
    val: bool = True,
    test: bool = True,
    skip_cache: bool = False,
) -> list[EFG_SG or EFG]:
    datasets = []
    if train:
        ds_train = EFG_SG(
            train=True,
            root=storage_path,
            filename=split_feature_storage_file,
            label_key=target_label,
            size_subgraph_samples=subgraph_size,
            transform=transform,
            verbosity=51,
            skip_cache=skip_cache,
        )
        datasets.append(ds_train)
    if val:
        ds_val = EFG_SG(
            validation=True,
            root=storage_path,
            filename=split_feature_storage_file,
            label_key=target_label,
            size_subgraph_samples=subgraph_size,
            transform=transform,
            verbosity=51,
            skip_cache=skip_cache,
        )
        datasets.append(ds_val)
    if test:
        ds_test = EFG_SG(
            test=True,
            root=storage_path,
            filename=split_feature_storage_file,
            label_key=target_label,
            size_subgraph_samples=subgraph_size,
            transform=transform,
            verbosity=51,
            skip_cache=skip_cache,
        )
        datasets.append(ds_test)
    return datasets


def print_dataset_summaries(
    ds_train: EFG_SG or EFG,
    ds_val: EFG_SG or EFG,
    ds_test: EFG_SG or EFG,
) -> None:
    print("Train set")
    print(ds_train.get_summary(), "\n")
    print("Validation set")
    print(ds_val.get_summary(), "\n")
    print("Test set")
    print(ds_test.get_summary(), "\n")


def configure_adams_model(
    num_node_features: int,
    num_hidden_features: int,
    size_subgraph_samples: int,
    device: torch.device,
) -> GraphModel:
    # Initialize model
    model = AdamsGCN(
        num_node_features=num_node_features,
        hyperparams={
            "num_hidden_features": num_hidden_features,
            "size_subgraph_samples": size_subgraph_samples,
        },
    )

    model = model.to(device)
    # data = ds_train.to(device)
    return model


def count_parameters(model: GraphModel) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def prepare_dataloaders(
    batch_size: int,
    ds_train: EFG_SG or EFG = None,
    ds_val: EFG_SG  or EFG= None,
    ds_test: EFG_SG  or EFG= None,
    shuffle: bool = True,
    pin_memory: bool = True,
    num_workers: int = 4,
    seed_worker: Callable[[int], None] = None,
    generator: torch.Generator = None,
) -> list[DataLoader]:
    dataloaders = []
    if ds_train:
        train_loader = DataLoader(
            ds_train,
            batch_size=batch_size,
            shuffle=shuffle,
            pin_memory=pin_memory,
            num_workers=num_workers,
            worker_init_fn=seed_worker,
            generator=generator,
        )
        dataloaders.append(train_loader)
    if ds_val:
        val_loader = DataLoader(
            ds_val,
            batch_size=batch_size,
            shuffle=shuffle,
            pin_memory=pin_memory,
            num_workers=num_workers,
            worker_init_fn=seed_worker,
            generator=generator,
        )
        dataloaders.append(val_loader)
    if ds_test:
        test_loader = DataLoader(
            ds_test,
            batch_size=128,
            shuffle=shuffle,
            pin_memory=pin_memory,
            num_workers=num_workers,
            worker_init_fn=seed_worker,
            generator=generator,
        )
        dataloaders.append(test_loader)
    return dataloaders


def train_one_epoch(
    epoch_index: int,
    model: GraphModel,
    train_loader: DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
    tb_writer: SummaryWriter,
    device: torch.device,
    verbose: bool = True,
) -> float:
    if verbose:
        print(f"EPOCH {epoch_index + 1}:")

    # Enumerate over the data
    running_loss = 0.0
    last_loss = 0
    for i, batch in enumerate(tqdm(train_loader, miniters=25)):
        # Use GPU
        batch.to(device)
        # Every data instance is an input + label pair
        inputs, adjacency_matrix, labels = (
            batch.x.float(),  # k times the batch_size, where k is the subgraph size
            batch.edge_index,
            batch.y.float(),
        )
        # Reset gradients (set_to_none is faster than to zero)
        optimizer.zero_grad(set_to_none=True)
        # Passing the node features and the connection info
        outputs = model(inputs, edge_index=adjacency_matrix, batch=batch.batch)
        # Compute loss and gradients
        loss = loss_fn(outputs.squeeze(), labels)
        loss.backward()
        # Adjust learnable weights
        optimizer.step()
        # Gather data and report
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000  # loss per batch
            if verbose:
                print(f"  batch {i + 1} loss: {last_loss}")
            tb_x = epoch_index * len(train_loader) + i + 1
            tb_writer.add_scalar("Loss/train", last_loss, tb_x)
            running_loss = 0.0

    return last_loss


def run_training(
    num_epochs: int,
    model: GraphModel,
    train_loader: DataLoader,
    validation_loader: DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
    early_stopping_criterion: int,
    timestamp: str,
    device: torch.device,
    verbose: bool = True,
) -> str:
    model_path = f"models/runs/{model.get_class_name()}_{timestamp}"
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    writer = SummaryWriter(f"{model_path}/run")
    best_vloss = 1_000_000_000_000_000.0
    epochs_without_improvement = 0
    for epoch in range(num_epochs):
        # Make sure gradient tracking is on, and do a pass over the data
        model.train(True)
        avg_loss = train_one_epoch(
            epoch, model, train_loader, optimizer, loss_fn, writer, device, verbose
        )

        # We don't need gradients on to do reporting
        model.train(False)

        running_vloss = 0.0
        for i, vdata in enumerate(validation_loader):
            vdata.to(device)
            vinputs, vadjacency_matrix, vlabels = (
                vdata.x.float(),
                vdata.edge_index,
                vdata.y.float(),
            )
            voutputs = model(vinputs, edge_index=vadjacency_matrix, batch=vdata.batch)
            vloss = loss_fn(voutputs.squeeze(), vlabels)
            running_vloss += vloss

        avg_vloss = running_vloss / (i + 1)
        if verbose:
            print(f"LOSS train {avg_loss} valid {avg_vloss}")

        # Log the running loss averaged per batch
        # for both training and validation
        writer.add_scalars(
            "Training vs. Validation Loss",
            {"Training": avg_loss, "Validation": avg_vloss},
            epoch + 1,
        )
        writer.flush()

        # Track best performance, and save the model's state
        if avg_vloss < best_vloss:
            best_vloss = avg_vloss
            epochs_without_improvement = 0
            torch.save(model.state_dict(), f"{model_path}/state_dict_epoch{epoch}.pt")
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= early_stopping_criterion:
            print(f"Early stopping after {epoch+1} epochs.")
            break
    return model_path


def evaluate_model(
    model: GraphModel,
    dataloader: DataLoader,
    evaluation_reporter: Callable[[torch.Tensor, torch.Tensor, bool, bool], dict[str,dict[str,Any]]],
    regression:bool,
    classification:bool,
    verbose: bool = False,
) -> dict[str,dict[str,Any]]:
    device=torch.device("cpu")
    with torch.no_grad():

        def _eval_batch(batch, model):
            batch_inputs, batch_adjacency_matrix, batch_labels = (
                batch.x.float(),
                batch.edge_index,
                batch.y.float(),
            )
            return (
                model(
                    batch_inputs, edge_index=batch_adjacency_matrix, batch=batch.batch
                ),
                batch_labels,
            )

        model.eval()
        model.train(False)
        model.to(device)
        y_preds = torch.tensor([]).to(device)
        y_true = torch.tensor([]).to(device)
        for batch in tqdm(dataloader, disable=not (verbose)):
            batch.to(device)
            batch_y_preds, batch_y_true = _eval_batch(batch, model)
            # append current batch prediction
            y_preds = torch.cat((y_preds, batch_y_preds))
            y_true = torch.cat((y_true, batch_y_true))
        y_preds = torch.squeeze(y_preds)
    return evaluation_reporter(y_preds.to(device), y_true.to(device),regression,classification)


def get_best_model_evaluation(
    model_state_dir: str,
    train_loader: DataLoader,
    val_loader: DataLoader,
    test_loader: DataLoader,
    model: GraphModel,
    evaluation_reporter: Callable[[torch.Tensor, torch.Tensor, bool, bool], dict[str,dict[str,Any]]],
    regression:bool,
    classification:bool,
    verbose: bool = True,
) -> dict[str,dict[str,Any]]:
    def find_latest_state_dict(dir: str) -> str:
        latest_state_dict_path = sorted(
            [
                item
                for item in os.listdir(dir)
                if len(item.split("state_dict_epoch")) == 2
            ]
        )[-1]
        return os.path.join(dir, latest_state_dict_path)

    best_state_dict = torch.load(
        find_latest_state_dict(model_state_dir)#, map_location=device
    )

    model.load_state_dict(best_state_dict)
    model.eval()
    evaluation = {
        f"Train": evaluate_model(
            model=model,
            dataloader=train_loader,
            evaluation_reporter=evaluation_reporter,
            regression=regression,
            classification=classification,
            verbose=verbose,
        ),
        f"Validation": evaluate_model(
            model=model,
            dataloader=val_loader,
            evaluation_reporter=evaluation_reporter,
            regression=regression,
            classification=classification,
            verbose=verbose,
        ),
        f"Test": evaluate_model(
            model=model,
            dataloader=test_loader,
            evaluation_reporter=evaluation_reporter,
            regression=regression,
            classification=classification,
            verbose=verbose,
        ),
    }
    return evaluation

In [8]:
# Data preparation
def seed_worker(worker_id: int) -> None:
    # worker_seed = torch.initial_seed() % RANDOM_SEED
    worker_seed = bpi17_config["RANDOM_SEED"]
    np.random.seed(worker_seed)
    random.seed(worker_seed)


generator = torch.Generator().manual_seed(bpi17_config["RANDOM_SEED"])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Get data and dataloaders
ds_train, ds_val, ds_test = load_datasets(
    bpi17_config["STORAGE_PATH"],
    bpi17_config["SPLIT_FEATURE_STORAGE_FILE"],
    bpi17_config["TARGET_LABEL"],
    bpi17_config["SUBGRAPH_SIZE"],
    train=True,
    val=True,
    test=True,
    skip_cache=bpi17_config["skip_cache"],
)
train_loader, val_loader, test_loader = prepare_dataloaders(
    batch_size=bpi17_config["BATCH_SIZE"],
    ds_train=ds_train,
    ds_val=ds_val,
    ds_test=ds_test,
    seed_worker=seed_worker,
    generator=generator,
)


In [10]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HigherOrderGNN_EFG(48, 1)
# pretrained_state_dict = torch.load("models/runs/GraphConvNet_20230718_13h59m/state_dict_epoch6.pt")
# model.load_state_dict(pretrained_state_dict)
model.to(device)

# Print summary of data and model
if bpi17_config["verbose"]:
    print(model)
    with torch.no_grad():  # Initialize lazy modules, s.t. we can count its parameters.
        batch = next(iter(train_loader))
        batch.to(device)
        out = model(batch.x.float(), batch.edge_index, batch.batch)
        print(f"Number of parameters: {count_parameters(model)}")


HigherOrderGNN_EFG(
  (conv1): GraphConv(-1, 48)
  (conv2): GraphConv(-1, 48)
  (act1): PReLU(num_parameters=1)
  (act2): PReLU(num_parameters=1)
  (lin_out): Linear(-1, 1, bias=True)
)
Number of parameters: 7347


In [None]:
# Training
print("Training started, progress available in Tensorboard")
torch.cuda.empty_cache()

model_path = run_training(
    num_epochs=bpi17_config["EPOCHS"],
    model=model,
    train_loader=train_loader,
    validation_loader=val_loader,
    optimizer=O.Adam(
        model.parameters(), **bpi17_config["optimizer_settings"]
    ),
    loss_fn=bpi17_config["loss_fn"],
    early_stopping_criterion=bpi17_config["early_stopping"],
    timestamp=datetime.now().strftime("%Y%m%d_%Hh%Mm"),
    device=device,
    verbose=True,
)
# Write experiment settings as JSON into model path
with open(os.path.join(model_path, "experiment_settings.json"), "w") as file_path:
    bpi17_config["loss_fn"] = str(bpi17_config["loss_fn"])
    json.dump(bpi17_config, file_path)

In [14]:
# Evaluation
state_dict_path = "models/runs/AGNN_20230714_12h19m"  # 0.59 test mae
state_dict_path = "models/runs/AGNN_20230714_14h26m"  # 0.54 test mae
state_dict_path = "models/runs/AGNN_20230717_15h16m"  # 0.48 test mae ()
state_dict_path = "models/runs/AGNN_20230717_16h37m"  # 0.47 test mae
state_dict_path = "models/runs/AGNN_20230717_15h51m"  # 0.4557 test mae (ChebConv)
state_dict_path = "models/runs/AGNN_20230717_16h58m"  # 0.4546 test mae
state_dict_path = "models/runs/AGNN_20230717_23h22m"  # 0.4534 test mae
state_dict_path = "models/runs/SimpleGNN_20230718_09h30m"  # 0.4382 test mae | 6k params
state_dict_path = "models/runs/TransformerGNN_20230718_09h46m"  # 0.4290 test mae | 24k params
state_dict_path = "models/runs/GraphConvArch_20230718_10h08m"  # 0.4248 test mae | 12k params
state_dict_path = "models/runs/GraphConvNet_20230718_11h35m"  # 0.4149 test mae | 7k params
state_dict_path = "models/runs/GraphConvNet_20230718_11h54m"  # 0.4113 test mae | 7k params
state_dict_path = "models/runs/GraphConvNet_20230718_13h59m"  # 0.4040 test mae | 7k params | fine-tuning pretrained 'GraphConvNet_20230718_11h54m'  // best so far!

# Get model evaluation report
evaluation_report = get_best_model_evaluation(
    model_state_dir=state_dict_path,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    model=model,
    evaluation_reporter=evaluation_helpers.get_evaluation,
    regression=True,
    classification=False,
    verbose=bpi17_config["verbose"],
)

# Print evaluation report
pprint.pprint(evaluation_report)

{'Test': {'report': {'MAE': 0.4040075,
                     'MAPE': 4.1964836,
                     'MSE': 0.46823576,
                     'R^2': -0.02102018627294422}},
 'Train': {'report': {'MAE': 0.40583354,
                      'MAPE': 5.2838283,
                      'MSE': 0.4805708,
                      'R^2': -0.02500405303885933}},
 'Validation': {'report': {'MAE': 0.4187696,
                           'MAPE': 4.9057894,
                           'MSE': 0.49645367,
                           'R^2': -0.07418939895318877}}}
