In [1]:
import os

go_up_n_directories = lambda path, n: os.path.abspath(
    os.path.join(*([os.path.dirname(path)] + [".."] * n))
)
os.chdir(go_up_n_directories(os.getcwd(), 1))  # run once (otherwise restart kernel)

In [11]:
# Imports
import pm4py
import pm4py.utils
import pm4py.read
from pm4py.objects.ocel.obj import OCEL
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
import ocpa.algo.predictive_monitoring.factory as feature_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
import torch_geometric.transforms as T
import json
from typing import Any
from copy import copy
import pickle
import pandas as pd
import numpy as np
from pprint import pprint

from utilities import hetero_data_utils, data_utils
from experiments.hoeg import HOEG

In [3]:
# ocel_ce_in_file = "../../data/BPI17/source/BPI2017-CountEncoded.jsonocel"
# ocel_ce = pm4py.read.read_ocel(ocel_ce_in_file)

ocel_in_file = "data/BPI17/source/BPI2017-Final.jsonocel"
ocel = pm4py.read.read_ocel(ocel_in_file)
ocel_table = ocel.get_extended_table()

In [14]:
# check whether an event can refer to multiple offers:
mul_offers = ocel_table['ocel:type:offer'].explode().shape != ocel_table['ocel:type:offer'].shape
print('An event may refer to multiple offers: ', mul_offers)
# answer: YES, it can. For example `ocel:eid==385026`.

# check whether an event can refer to multiple applications:
mul_applications = ocel_table['ocel:type:application'].explode().shape != ocel_table['ocel:type:application'].shape
print('An event may refer to multiple applications: ', mul_applications)
# answer: NO, it cannot.


An event may refer to multiple offers:  True
An event may refer to multiple applications:  False


In [None]:
ocel_ce_in_file = "data/BPI17/source/BPI2017-CountEncoded.jsonocel"
ocel_in_file = "data/BPI17/source/BPI2017-Final.jsonocel"

In [4]:
ocpa_ocel_ce = ocel_import_factory.apply(ocel_ce_in_file)
ocpa_ocel = ocel_import_factory.apply(ocel_in_file)

In [5]:
trace_lengths = [len(t) for t in ocpa_ocel_ce.process_executions]
bpi17_events_stats = {
    "no_events": ocpa_ocel_ce.log.log.index.shape[0],
    "min_trace_len": min(trace_lengths),
    "max_trace_len": max(trace_lengths),
    "median_trace_len": np.median(trace_lengths),
    "mean_trace_len": np.mean(trace_lengths),
    "std_trace_len": np.std(trace_lengths),
}
pprint(bpi17_events_stats)

{'max_trace_len': 41,
 'mean_trace_len': 12.502173982036878,
 'median_trace_len': 12.0,
 'min_trace_len': 6,
 'no_events': 393931,
 'std_trace_len': 3.5650717994052803}


In [6]:
transformations = [
    hetero_data_utils.AddObjectSelfLoops(),
    T.AddSelfLoops(),
    T.NormalizeFeatures(),
]
bpi17_hoeg_ds = HOEG(
    root="data/BPI17/feature_encodings/HOEG/hoeg",
    events_filename="BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs",
    objects_filename="bpi17_ofg+oi_graph+app_node_map+off_node_map.pkl",
    event_node_label_key=(feature_factory.EVENT_REMAINING_TIME, ()),
    object_nodes_label_key="@@object_lifecycle_duration",
    edge_types=[
        ("event", "follows", "event"),
        ("application", "interacts", "event"),
        ("offer", "interacts", "event"),
    ],
    object_node_types=["application", "offer"],
    transform=T.Compose(transformations),
)
next(iter(data_utils.DataLoader(bpi17_hoeg_ds, batch_size=bpi17_hoeg_ds.len())))

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


HeteroDataBatch(
  [1mevent[0m={
    x=[393931, 27],
    y=[393931],
    batch=[393931],
    ptr=[31510]
  },
  [1mapplication[0m={
    x=[31509, 3],
    y=[31509],
    batch=[31509],
    ptr=[31510]
  },
  [1moffer[0m={
    x=[42995, 7],
    y=[42995],
    batch=[42995],
    ptr=[31510]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 1219845] },
  [1m(event, interacts, application)[0m={ edge_index=[2, 328894] },
  [1m(event, interacts, offer)[0m={ edge_index=[2, 201006] }
)

In [7]:
data_utils.print_dataset_summaries(bpi17_hoeg_ds)

Train set


100%|██████████| 31509/31509 [00:31<00:00, 1011.56it/s]


HOEG (#graphs=31509):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     14.9 |     57.9 |
| std        |      4.1 |     16.8 |
| min        |      8   |     28   |
| quantile25 |     12   |     46   |
| median     |     14   |     54   |
| quantile75 |     17   |     66   |
| max        |     52   |    206   |
+------------+----------+----------+ 



In [8]:
with open("data/BPI17/feature_encodings/HOEG/hoeg/raw/bpi17_ofg+oi_graph+app_node_map+off_node_map.pkl", 'rb') as fp:
    obj_dict = pickle.load(fp)
obj_dict['ofg']

HeteroData(
  [1mapplication[0m={
    y=[31509],
    x=[31509, 3]
  },
  [1moffer[0m={
    y=[42995],
    x=[42995, 7]
  },
  [1m(application, interacts, application)[0m={ edge_index=[2, 0] },
  [1m(application, interacts, offer)[0m={ edge_index=[2, 42995] },
  [1m(offer, interacts, offer)[0m={ edge_index=[2, 6027] }
)

In [12]:
with open("data/BPI17/feature_encodings/EFG/efg/raw/BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs", 'rb') as fp:
    fs: Feature_Storage = pickle.load(fp)

In [23]:
throughput_times = [fg.attributes[fs.execution_features[0]] for fg in fs.feature_graphs]
bpi17_cases_stats = {
    "no_cases": len(fs.feature_graphs),
    "min_tp": min(throughput_times),
    "max_tp": max(throughput_times),
    "median_tp": np.median(throughput_times),
    "mean_tp": np.mean(throughput_times),
    "std_tp": np.std(throughput_times),
}
pprint(bpi17_cases_stats)

{'max_tp': 14604259.821,
 'mean_tp': 1887853.9103989338,
 'median_tp': 1646735.9,
 'min_tp': 201.062,
 'no_cases': 31509,
 'std_tp': 1119596.759066281}
