In [1]:
import os

go_up_n_directories = lambda path, n: os.path.abspath(
    os.path.join(*([os.path.dirname(path)] + [".."] * n))
)
os.chdir(go_up_n_directories(os.getcwd(), 1))  # run once (otherwise restart kernel)

In [2]:
# Imports
import pm4py
import pm4py.utils
import pm4py.read
from pm4py.objects.ocel.obj import OCEL
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
import ocpa.algo.predictive_monitoring.factory as feature_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
import torch_geometric.transforms as T
import json
from typing import Any
from copy import copy
import pickle
import pandas as pd
import numpy as np
from pprint import pprint

from utilities import hetero_data_utils, data_utils
from experiments.hoeg import HOEG

In [3]:
ocel_in_file = "data/OTC/source/OTC.jsonocel"
ocel = pm4py.read.read_ocel(ocel_in_file)

In [4]:
ocpa_ocel = ocel_import_factory.apply(
    ocel_in_file,
    parameters={"execution_extraction": "leading_type", "leading_type": "item"},
)

In [5]:
trace_lengths = [len(t) for t in ocpa_ocel.process_executions]
otc_events_stats = {
    "no_events": ocpa_ocel.log.log.index.shape[0],
    "min_trace_len": min(trace_lengths),
    "max_trace_len": max(trace_lengths),
    "median_trace_len": np.median(trace_lengths),
    "mean_trace_len": np.mean(trace_lengths),
    "std_trace_len": np.std(trace_lengths),
}
pprint(otc_events_stats)

{'max_trace_len': 155,
 'mean_trace_len': 57.92572619193528,
 'median_trace_len': 56.0,
 'min_trace_len': 8,
 'no_events': 22367,
 'std_trace_len': 22.771971594254516}


In [13]:
transformations = [
    hetero_data_utils.AddObjectSelfLoops(),
    T.AddSelfLoops(),
]
otc_hoeg_ds = HOEG(
    root="data/OTC/feature_encodings/HOEG/hoeg",
    events_filename="OTC_split_[C2_P2_P3_O3_eas].fs",
    objects_filename="otc_ofg+oi_graph+item_node_map+order_node_map+packages_node_map.pkl",
    event_node_label_key=(feature_factory.EVENT_REMAINING_TIME, ()),
    object_nodes_label_key="@@object_lifecycle_duration",
    edge_types=[
        ("event", "follows", "event"),
        ("order", "interacts", "event"),
        ("item", "interacts", "event"),
        ("package", "interacts", "event"),
    ],
    object_node_types=["order", "item","package"],
    transform=T.Compose(transformations),
)
next(iter(data_utils.DataLoader(otc_hoeg_ds, batch_size=otc_hoeg_ds.len())))

HeteroDataBatch(
  [1mevent[0m={
    x=[472616, 20],
    y=[472616],
    batch=[472616],
    ptr=[8160]
  },
  [1morder[0m={
    x=[100437, 1],
    y=[100437],
    batch=[100437],
    ptr=[8160]
  },
  [1mitem[0m={
    x=[317093, 1],
    y=[317093],
    batch=[317093],
    ptr=[8160]
  },
  [1mpackage[0m={
    x=[44156, 1],
    y=[44156],
    batch=[44156],
    ptr=[8160]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 1324098] },
  [1m(order, interacts, event)[0m={ edge_index=[2, 895990] },
  [1m(item, interacts, event)[0m={ edge_index=[2, 1761874] },
  [1m(package, interacts, event)[0m={ edge_index=[2, 145490] }
)

In [7]:
data_utils.print_dataset_summaries(otc_hoeg_ds)

Train set
HOEG (#graphs=8159):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |    114.5 |    562.5 |
| std        |     46.8 |    244.4 |
| min        |     11   |     38   |
| quantile25 |     80   |    381   |
| median     |    110   |    535   |
| quantile75 |    145   |    721   |
| max        |    314   |   1589   |
+------------+----------+----------+ 



In [6]:
with open("data/OTC/feature_encodings/HOEG/hoeg/raw/otc_ofg+oi_graph+item_node_map+order_node_map+packages_node_map.pkl", 'rb') as fp:
    obj_dict = pickle.load(fp)
obj_dict['ofg']

HeteroData(
  [1mitem[0m={
    y=[8159],
    x=[8159, 1]
  },
  [1morder[0m={
    y=[2000],
    x=[2000, 1]
  },
  [1mpackage[0m={
    y=[1325],
    x=[1325, 1]
  },
  [1m(item, interacts, item)[0m={ edge_index=[2, 38863] }
)

In [9]:
fs = feature_factory.apply(
    ocpa_ocel,
    execution_based_features=[(feature_factory.EXECUTION_THROUGHPUT, ())],
    verbose=False
)

In [10]:
throughput_times = [fg.attributes[fs.execution_features[0]] for fg in fs.feature_graphs]
bpi17_cases_stats = {
    "no_cases": len(fs.feature_graphs),
    "min_tp": min(throughput_times),
    "max_tp": max(throughput_times),
    "median_tp": np.median(throughput_times),
    "mean_tp": np.mean(throughput_times),
    "std_tp": np.std(throughput_times),
}
pprint(bpi17_cases_stats)

{'max_tp': 12109961.0,
 'mean_tp': 3594196.7417575684,
 'median_tp': 3335901.0,
 'min_tp': 447864.0,
 'no_cases': 8159,
 'std_tp': 1638634.3110654803}
