In [1]:
import os
os.chdir("/home/tim/Development/OCPPM/")
import logging
from experiment.feature_encodings.hoeg.hoeg import HOEG
# from experiment.feature_encodings.efg.efg import EFG
import ocpa.algo.predictive_monitoring.factory as feature_factory
import torch.nn
import torch.nn.functional as F
import torch.optim as O
import torch_geometric.transforms as T
from torch_geometric.nn import (
    GATConv,
    Linear,
    to_hetero,
)

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    filename="logging/debug.log",
)
logging.critical(f"{'-' * 32} NEW RUN {'-' * 32}")

feature_storage_file = "data/BPI17/feature_encodings/HOEG/hoeg"
fs_file = "BPI2017-feature_storage-split-[C1-3,C5,P1-6,O2,O3,O5].fs"
objects_data_dict = "bpi17_ofg+oi_graph+app_node_map+off_node_map.pkl"
target =(feature_factory.EVENT_REMAINING_TIME, ())
use_cache = True

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


  from .autonotebook import tqdm as notebook_tqdm


Torch version: 1.13.1+cu117
Cuda available: True
Torch geometric version: 2.3.1



In [2]:
dataset = HOEG(
    root=feature_storage_file,
    events_filename=fs_file,
    objects_filename=objects_data_dict,
    label_key=target,
    use_cache=use_cache,
    transform=T.ToUndirected()
)

In [12]:
dataset.get_summary()

100%|██████████| 31509/31509 [00:29<00:00, 1076.15it/s]


HOEG (#graphs=31509):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     14.9 |     61.1 |
| std        |      4.1 |     18.4 |
| min        |      8   |     28   |
| quantile25 |     12   |     48   |
| median     |     14   |     56   |
| quantile75 |     17   |     68   |
| max        |     52   |    240   |
+------------+----------+----------+

In [9]:
dataset[0]

HeteroData(
  [1mevent[0m={
    x=[41, 93],
    y=[41]
  },
  [1mapplication[0m={ x=[1, 18] },
  [1moffer[0m={ x=[9, 8] },
  [1m(event, follows, event)[0m={ edge_index=[2, 98] },
  [1m(event, interacts, application)[0m={ edge_index=[2, 32] },
  [1m(event, interacts, offer)[0m={ edge_index=[2, 28] },
  [1m(application, interacts, application)[0m={ edge_index=[2, 0] },
  [1m(application, rev_interacts, event)[0m={ edge_index=[2, 32] },
  [1m(offer, rev_interacts, event)[0m={ edge_index=[2, 28] }
)

In [10]:
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GATConv((-1, -1), hidden_channels, add_self_loops=False)
        self.lin1 = Linear(-1, hidden_channels)
        self.conv2 = GATConv((-1, -1), out_channels, add_self_loops=False)
        self.lin2 = Linear(-1, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index) + self.lin1(x)
        x = x.relu()
        x = self.conv2(x, edge_index) + self.lin2(x)
        return x



model = GAT(hidden_channels=128, out_channels=1)
model = to_hetero(model, dataset[0].metadata(), aggr='sum')
optimizer = O.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
model.to(DEVICE)

GraphModule(
  (conv1): ModuleDict(
    (event__follows__event): GATConv((-1, -1), 128, heads=1)
    (event__interacts__application): GATConv((-1, -1), 128, heads=1)
    (event__interacts__offer): GATConv((-1, -1), 128, heads=1)
    (application__interacts__application): GATConv((-1, -1), 128, heads=1)
  )
  (lin1): ModuleDict(
    (event): Linear(-1, 128, bias=True)
    (application): Linear(-1, 128, bias=True)
    (offer): Linear(-1, 128, bias=True)
  )
  (conv2): ModuleDict(
    (event__follows__event): GATConv((-1, -1), 1, heads=1)
    (event__interacts__application): GATConv((-1, -1), 1, heads=1)
    (event__interacts__offer): GATConv((-1, -1), 1, heads=1)
    (application__interacts__application): GATConv((-1, -1), 1, heads=1)
  )
  (lin2): ModuleDict(
    (event): Linear(-1, 1, bias=True)
    (application): Linear(-1, 1, bias=True)
    (offer): Linear(-1, 1, bias=True)
  )
)

In [None]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data['paper'].train_mask
    loss = F.cross_entropy(out['paper'][mask], data['paper'].y[mask])
    loss.backward()
    optimizer.step()
    return float(loss)