In [1]:
# %%
# DEPENDENCIES
import os

os.chdir("/home/tim/Development/OCPPM/")
import functools
import json
import pprint
import pickle
import random
from datetime import datetime

import numpy as np
import pandas as pd
import ocpa.algo.predictive_monitoring.factory as feature_factory
import torch
import torch.nn as nn
import torch.optim as O
import torch.utils.tensorboard
import torch_geometric.loader as L
import torch_geometric.nn as pygnn
import torch_geometric.transforms as T
from torch_geometric.data import HeteroData

import utilities.evaluation_utils as evaluation_utils
import utilities.hetero_data_utils as hetero_data_utils
import utilities.hetero_evaluation_utils as hetero_evaluation_utils
import utilities.hetero_training_utils as hetero_training_utils
import utilities.torch_utils as torch_utils

# Custom imports
from models.definitions.geometric_models import GraphModel, HeteroHigherOrderGNN

# Print system info
torch_utils.print_system_info()
torch_utils.print_torch_info()

# INITIAL CONFIGURATION
# our target is @@object_lifecycle_duration, a regression target
cs_ofg_config = {
    "ofg_file": "data/CS/feature_encodings/OFG/ofg/raw/CS_OFG.pkl",
    "model_output_path": "models/CS/ofg",
    "BATCH_SIZE": 256,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "target_node_type": "krs",
    "meta_data": (
        ["krs", "krv", "cv"],
        [
            ("krs", "interacts", "krv"),
            ("cv", "interacts", "krv"),
            ("cv", "interacts", "krs"),
            ("krv", "rev_interacts", "krs"),
            ("krv", "rev_interacts", "cv"),
            ("krs", "rev_interacts", "cv"),
        ],
    ),
    "early_stopping": 3,
    "optimizer": O.Adam,
    "optimizer_settings": {
        "lr": 1e-3,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.L1Loss(),
    "verbose": True,
    "skip_cache": False,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}

# CONFIGURATION ADAPTATIONS may be set here
cs_ofg_config["BATCH_SIZE"] = 512
cs_ofg_config["EPOCHS"] = 32
cs_ofg_config["early_stopping"] = 4
cs_ofg_config["verbose"] = False
# cs_ofg_config["optimizer_settings"]['lr']=1e-4

CRITICAL:root:-------------------------------- TEST CS HOEG --------------------------------


CPU: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz (4x)
Total CPU memory: 46.93GB
Available CPU memory: 29.56GB
GPU: NVIDIA GeForce GTX 960
Total GPU memory: 4096.0MB
Available GPU memory: 4029.0MB
Platform: Linux-5.19.0-46-generic-x86_64-with-glibc2.35
Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.3.1


In [3]:
# %%
# DATA PREPARATION
# Load HeteroData object from a pickle file using the specified file path
with open(cs_ofg_config["ofg_file"], "rb") as fp:
    data: HeteroData = pickle.load(fp)
# Define a list of transformations to be applied in sequence
torch.manual_seed(cs_ofg_config["RANDOM_SEED"])
transformations = [
    T.ToUndirected(),  # Convert the graph to an undirected graph
    T.AddSelfLoops(),  # Add self-loops to the graph
    T.NormalizeFeatures(),  # Normalize node features of the graph
    T.RandomNodeSplit(
        num_val=0.8 * 0.2, num_test=0.2
    ),  # Split the graph into train, validation, and test sets based on random node assignment
]
# Apply the transformation pipeline to the data at once
data = T.Compose(transformations)(data)
# Create hetero dataloaders for each split
(
    train_loader,
    val_loader,
    test_loader,
) = hetero_data_utils.hetero_dataloaders_from_hetero_data(
    hetero_data=data,
    batch_size=cs_ofg_config["BATCH_SIZE"],
    num_neighbors=[3] * 2,
    node_type=cs_ofg_config["target_node_type"],
    shuffle=True,
    pin_memory=True,
    num_workers=2,
    generator=torch.Generator().manual_seed(cs_ofg_config["RANDOM_SEED"]),
)

In [4]:
data

HeteroData(
  [1mkrs[0m={
    y=[205995],
    x=[205995, 21],
    train_mask=[205995],
    val_mask=[205995],
    test_mask=[205995]
  },
  [1mkrv[0m={
    y=[111427],
    x=[111427, 21],
    train_mask=[111427],
    val_mask=[111427],
    test_mask=[111427]
  },
  [1mcv[0m={
    y=[6613],
    x=[6613, 21],
    train_mask=[6613],
    val_mask=[6613],
    test_mask=[6613]
  },
  [1m(krs, interacts, krv)[0m={ edge_index=[2, 73007] },
  [1m(cv, interacts, krv)[0m={ edge_index=[2, 2173] },
  [1m(cv, interacts, krs)[0m={ edge_index=[2, 15] },
  [1m(krv, rev_interacts, krs)[0m={ edge_index=[2, 73007] },
  [1m(krv, rev_interacts, cv)[0m={ edge_index=[2, 2173] },
  [1m(krs, rev_interacts, cv)[0m={ edge_index=[2, 15] }
)

In [3]:
# %%
# MODEL INITIATION
class HeteroHigherOrderGNN(GraphModel):
    def __init__(
        self,
        hidden_channels: int = 32,
        out_channels: int = 1,
        regression_target: bool = True,
    ):
        super().__init__()
        self.conv1 = pygnn.GraphConv(-1, hidden_channels)
        self.conv2 = pygnn.GraphConv(-1, hidden_channels)
        self.act1 = nn.PReLU()
        self.act2 = nn.PReLU()
        self.lin_out = pygnn.Linear(-1, out_channels)
        self.probs_out = lambda x: x
        if not regression_target:
            self.probs_out = nn.Softmax(dim=1)

    def forward(self, x, edge_index, batch=None):
        x = self.conv1(x, edge_index)
        x = self.act1(x)
        x = self.conv2(x, edge_index)
        x = self.act2(x)
        x = self.lin_out(x)
        return self.probs_out(x)


model = HeteroHigherOrderGNN(64, 1)
model = pygnn.to_hetero(model, cs_ofg_config["meta_data"])
model.double()

# Print summary of data and model
# if cs_ofg_config["verbose"]:
# print(model)
with torch.no_grad():  # Initialize lazy modules, s.t. we can count its parameters.
    batch = next(iter(train_loader))
    batch.to(cs_ofg_config["device"])
    model.to(cs_ofg_config["device"])
    out = model(batch.x_dict, batch.edge_index_dict)
    print(f"Number of parameters: {torch_utils.count_parameters(model)}")



Number of parameters: 66249


In [4]:
# %%
# MODEL TRAINING
print("Training started, progress available in Tensorboard")
torch.cuda.empty_cache()

timestamp = datetime.now().strftime("%Y%m%d_%Hh%Mm")
model_path_base = (
    f"{cs_ofg_config['model_output_path']}/{str(model).split('(')[0]}_{timestamp}"
)

best_state_dict_path = hetero_training_utils.run_training_hetero(
    target_node_type=cs_ofg_config["target_node_type"],
    num_epochs=cs_ofg_config["EPOCHS"],
    model=model,
    train_loader=train_loader,
    validation_loader=val_loader,
    optimizer=cs_ofg_config["optimizer"](
        model.parameters(), **cs_ofg_config["optimizer_settings"]
    ),
    loss_fn=cs_ofg_config["loss_fn"],
    early_stopping_criterion=cs_ofg_config["early_stopping"],
    model_path_base=model_path_base,
    device=cs_ofg_config["device"],
    verbose=cs_ofg_config["verbose"],
)

# Write experiment settings as JSON into model path (of the model we've just trained)
with open(os.path.join(model_path_base, "experiment_settings.json"), "w") as file_path:
    json.dump(evaluation_utils.get_json_serializable_dict(cs_ofg_config), file_path)

Training started, progress available in Tensorboard


100%|██████████| 258/258 [00:04<00:00, 56.23it/s]
  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
100%|██████████| 258/258 [00:04<00:00, 53.87it/s]
  return F.l1_loss(input, target, reduction=self.reduction)
100%|██████████| 258/258 [00:04<00:00, 59.76it/s]
100%|██████████| 258/258 [00:04<00:00, 63.52it/s]
100%|██████████| 258/258 [00:04<00:00, 55.02it/s]


Early stopping after 5 epochs.


In [5]:
state_dict_path = f"{cs_ofg_config['model_output_path']}/GraphModule_20230728_10h57m/state_dict_epoch0.pt"  # 0.4708 test mae | HeteroHigherOrderGNN(64, 1) | 36k params
state_dict_path = f"{cs_ofg_config['model_output_path']}/GraphModule_20230728_11h47m/state_dict_epoch0.pt"  # 0.4663 test mae | HeteroHigherOrderGNN(32, 1) | 14k params
state_dict_path = f"{cs_ofg_config['model_output_path']}/GraphModule_20230729_17h54m/state_dict_epoch0.pt"  # 0.4689 test mae | HeteroHigherOrderGNN(32, 1) | 21k params
state_dict_path = f"{cs_ofg_config['model_output_path']}/GraphModule_20230729_18h17m/state_dict_epoch1.pt"  # 0.4607 test mae | HeteroHigherOrderGNN(32, 1) | 21k params


# Get MAE results
evaluation_dict = hetero_evaluation_utils.evaluate_best_model(
    target_node_type=cs_ofg_config["target_node_type"],
    model_state_dict_path=best_state_dict_path,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    model=model,
    metric=torch.nn.L1Loss(),
    device=cs_ofg_config["device"],
    verbose=cs_ofg_config["verbose"],
)

# Store model results as JSON into model path
with open(os.path.join(model_path_base, "evaluation_report.json"), "w") as file_path:
    json.dump(evaluation_utils.get_json_serializable_dict(evaluation_dict), file_path)

# Print MAE results
print()
print(model_path_base)
pprint.pprint(evaluation_dict)


models/CS/ofg/GraphModule_20230729_20h15m
{'Test L1Loss()': tensor(0.4708, device='cuda:0', dtype=torch.float64),
 'Train L1Loss()': tensor(0.4665, device='cuda:0', dtype=torch.float64),
 'Val L1Loss()': tensor(0.4687, device='cuda:0', dtype=torch.float64)}
