In [1]:
# DEPENDENCIES
# Python native
import os

os.chdir("/home/tim/Development/OCPPM/")

import pprint
import functools
import gc
import json
from copy import copy
from datetime import datetime
from statistics import median as median
from typing import Any, Callable

# Data handling
import numpy as np

# PyG
import torch

# PyTorch TensorBoard support
import torch.utils.tensorboard

# # Simple machine learning models, procedure tools, and evaluation metrics
# from sklearn.model_selection import train_test_split
from torch_geometric.loader import DataLoader
from tqdm import tqdm
import ocpa.algo.predictive_monitoring.factory as feature_factory

# Custom imports
from experiments.efg import EFG
from experiments.efg_sg import EFG_SG
from utilities import torch_utils
from utilities import data_utils
from utilities import training_utils
from utilities import evaluation_utils

from models.definitions.geometric_models import (
    AGNN_EFG,
    AdamsGCN,
    GraphModel,
    HigherOrderGNN_EFG,
    SimpleGNN_EFG,
)
import torch_geometric.nn as pygnn
import torch.optim as O
import torch.nn as nn

# Print system info
torch_utils.print_system_info()

# Setup
cs_efg_config = {
    "model_output_path": "models/CS/efg",
    "STORAGE_PATH": "data/CS/feature_encodings/EFG/efg",
    "SPLIT_FEATURE_STORAGE_FILE": "CS_split_[C2_P2_P3_O3_eas].fs",
    "regression_task": False,
    "TARGET_LABEL": "event_ea4",
    "graph_level_prediction": True,
    "features_dtype": torch.float32,
    "target_dtype": torch.int64,
    "class_distribution": {
        0.0: 0.705315,
        1.0: 0.015818,
        2.0: 0.010882,
        3.0: 0.016800,
        4.0: 0.069764,
        5.0: 0.081383,
        6.0: 0.100038,
    },
    "SUBGRAPH_SIZE": 4,
    "BATCH_SIZE": 64,
    "RANDOM_SEED": 42,
    "EPOCHS": 30,
    "early_stopping": 5,
    "optimizer": O.Adam,
    "optimizer_settings": {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0,
        "amsgrad": False,
    },
    "loss_fn": torch.nn.CrossEntropyLoss(),
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "verbose": True,
    "skip_cache": False,
}

# ADAPTATIONS
# reset config for rem_time prediction:
cs_efg_config["regression_task"] = True
if cs_efg_config["regression_task"]:
    cs_efg_config["target_dtype"] = torch.float32
    cs_efg_config["TARGET_LABEL"] = (feature_factory.EVENT_REMAINING_TIME, ())
    cs_efg_config["loss_fn"] = torch.nn.L1Loss()
    if "class_distribution" in cs_efg_config:
        del cs_efg_config["class_distribution"]

# other adaptations
cs_efg_config["BATCH_SIZE"] = 64
cs_efg_config["early_stopping"] = 4
cs_efg_config["optimizer"] = O.NAdam
cs_efg_config["optimizer_settings"] = {
    "lr": 0.001,
}
# cs_efg_config["skip_cache"] = True

CPU: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz (4x)
Total CPU memory: 46.93GB
Available CPU memory: 41.66GB
GPU: NVIDIA GeForce GTX 960
Total GPU memory: 4096.0MB
Available GPU memory: 4029.0MB
Platform: Linux-5.19.0-46-generic-x86_64-with-glibc2.35


In [2]:
# Get data and dataloaders
(ds_train, ds_val, ds_test) = data_utils.load_datasets(
    dataset_class=EFG_SG,
    storage_path=cs_efg_config["STORAGE_PATH"],
    split_feature_storage_file=cs_efg_config["SPLIT_FEATURE_STORAGE_FILE"],
    target_label=cs_efg_config["TARGET_LABEL"],
    graph_level_target=cs_efg_config["graph_level_prediction"],
    features_dtype=cs_efg_config["features_dtype"],
    target_dtype=cs_efg_config["target_dtype"],
    subgraph_size=cs_efg_config["SUBGRAPH_SIZE"],
    train=True,
    val=True,
    test=True,
    skip_cache=cs_efg_config["skip_cache"],
)

No EventSubGraphDataset found with this configuration in 'data/CS/feature_encodings/EFG/efg/processed'. Proceeding to processing...


Processing...
61965it [05:43, 154.41it/s]

In [None]:
train_loader, val_loader, test_loader = data_utils.prepare_dataloaders(
    batch_size=cs_efg_config["BATCH_SIZE"],
    ds_train=ds_train,
    ds_val=ds_val,
    ds_test=ds_test,
    # num_workers=3,
    seed_worker=functools.partial(
        torch_utils.seed_worker, state=cs_efg_config["RANDOM_SEED"]
    ),
    generator=torch.Generator().manual_seed(cs_efg_config["RANDOM_SEED"]),
)

In [None]:
class SimpleHigherOrderGNN_EFG(GraphModel):
    """Implementation of a Graph Convolutional Network as in Adams et al. (2022)"""

    # SimpleGNN_EFG(64, 1): 0.4382 MAE (test), 6k params
    def __init__(
        self,
        hidden_channels: int = 64,
        out_channels: int = 1,
        regression_target: bool = True,
        graph_level_prediction: bool = True,
    ):
        super().__init__()
        self.conv1 = pygnn.GraphConv(-1, hidden_channels)
        self.act1 = nn.PReLU()
        self.pool1 = lambda x, batch: x
        if graph_level_prediction:
            self.pool1 = pygnn.global_mean_pool
        self.lin_out = pygnn.Linear(-1, out_channels)
        self.probs_out = lambda x: x
        if not regression_target:
            self.probs_out = nn.Softmax(dim=1)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = self.act1(x)
        x = self.pool1(x, batch)
        x = self.lin_out(x)
        return self.probs_out(x)
    
model = SimpleHigherOrderGNN_EFG(
    hidden_channels=64,
    out_channels=1,
    regression_target=cs_efg_config["regression_task"],
    graph_level_prediction=cs_efg_config["graph_level_prediction"],
)
# pretrained_state_dict = torch.load("models/runs/GraphConvNet_20230718_13h59m/state_dict_epoch6.pt")
# model.load_state_dict(pretrained_state_dict)

# cs_efg_config["verbose"] = False
# Print summary of data and model
if cs_efg_config["verbose"]:
    print(model)
    with torch.no_grad():  # Initialize lazy modules, s.t. we can count its parameters.
        batch = next(iter(train_loader))
        batch.to(cs_efg_config["device"])
        model.to(cs_efg_config["device"])
        out = model(batch.x.float(), batch.edge_index, batch.batch)
        print(f"Number of parameters: {torch_utils.count_parameters(model)}")
        del batch

SimpleGNN_EFG(
  (conv1): GCNConv(-1, 64)
  (conv2): GCNConv(-1, 64)
  (act1): ReLU()
  (act2): ReLU()
  (lin_out): Linear(-1, 1, bias=True)
  (probs_out): Softmax(dim=1)
)


Number of parameters: 5889


In [None]:
# TRAINING CONFIGURATION
# cs_efg_config["device"] = torch.device("cpu")
# Define the loss function with weight
# # dataloader = DataLoader(ds_train, ds_train.size)
# # batch = next(iter(dataloader))
# # class_distribution = pd.DataFrame(batch.y).value_counts(normalize=True).sort_index()
# # loss_weights = 1 / torch.tensor(class_distribution.values)
# cs_efg_config["class_distribution"] = {
#     0.0: 0.5,
#     1.0: 0.5 / 6,
#     2.0: 0.5 / 6,
#     3.0: 0.5 / 6,
#     4.0: 0.5 / 6,
#     5.0: 0.5 / 6,
#     6.0: 0.5 / 6,
# }
# loss_weights = 1 / torch.tensor(list(cs_efg_config["class_distribution"].values()))
# cs_efg_config["loss_fn"] = torch.nn.CrossEntropyLoss(
#     weight=loss_weights.to(cs_efg_config["device"])
# )
# # del dataloader
# # del batch

cs_efg_config["model_output_path"] = "models/CS/efg"
timestamp = datetime.now().strftime("%Y%m%d_%Hh%Mm")
model_path_base = (
    f"{cs_efg_config['model_output_path']}/{str(model).split('(')[0]}_{timestamp}"
)
# cs_efg_config["optimizer_settings"] = {
#     "lr": 5e-4,
#     "betas": (0.9, 0.999),
#     "eps": 1e-08,
#     "weight_decay": 0,
#     "amsgrad": False,
# }
# cs_efg_config["loss_fn"] = torch.nn.CrossEntropyLoss()
# cs_efg_config["EPOCHS"] = 30
# cs_efg_config["early_stopping"] = 5

In [None]:
# TRAINING
print("Training started, progress available in Tensorboard")
torch.cuda.empty_cache()


best_state_dict_path = training_utils.run_training(
    num_epochs=cs_efg_config["EPOCHS"],
    model=model,
    train_loader=train_loader,
    validation_loader=val_loader,
    optimizer=cs_efg_config["optimizer"](
        model.parameters(), **cs_efg_config["optimizer_settings"]
    ),
    loss_fn=cs_efg_config["loss_fn"],
    early_stopping_criterion=cs_efg_config["early_stopping"],
    model_path_base=model_path_base,
    x_dtype=cs_efg_config["features_dtype"],
    y_dtype=cs_efg_config["target_dtype"],
    device=cs_efg_config["device"],
    verbose=cs_efg_config["verbose"],
)
# Write experiment settings as JSON into model path (of the model we've just trained)
with open(os.path.join(model_path_base, "experiment_settings.json"), "w") as file_path:
    json.dump(evaluation_utils.get_json_serializable_dict(cs_efg_config), file_path)

In [None]:
state_dict_path = f"{cs_efg_config['model_output_path']}/HigherOrderGNN_EFG_20230726_14h50m/state_dict_epoch11.pt"  #  validation F1 | 7k params
state_dict_path = f"{cs_efg_config['model_output_path']}/HigherOrderGNN_EFG_20230727_14h23m/state_dict_epoch2.pt"  #  validation F1 | 7k params
state_dict_path = f"{cs_efg_config['model_output_path']}/SimpleGNN_EFG_20230729_12h43m/state_dict_epoch11.pt"  # 0.54 test MAE | 6k params

# Get model evaluation report
evaluation_report = evaluation_utils.get_best_model_evaluation(
    model_state_dict_path=best_state_dict_path,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    model=model,
    evaluation_reporter=evaluation_utils.get_evaluation,
    regression=True,
    # classification=True,
    verbose=cs_efg_config["verbose"],
)

# Store model results as JSON into model path
with open(os.path.join(model_path_base, "evaluation_report.json"), "w") as file_path:
    json.dump(evaluation_utils.get_json_serializable_dict(evaluation_report), file_path)

In [None]:
# Print evaluation report
pprint.pprint(evaluation_report)