In [1]:
import os

go_up_n_directories = lambda path, n: os.path.abspath(
    os.path.join(*([os.path.dirname(path)] + [".."] * n))
)
os.chdir(go_up_n_directories(os.getcwd(), 1))  # run once (otherwise restart kernel)

In [2]:
# Imports
import pm4py
import pm4py.utils
import pm4py.read
from pm4py.objects.ocel.obj import OCEL
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
import ocpa.algo.predictive_monitoring.factory as feature_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
import torch_geometric.transforms as T
import json
from typing import Any
from copy import copy
import pickle
import pandas as pd
import numpy as np
from pprint import pprint

from utilities import hetero_data_utils, data_utils
from experiments.hoeg import HOEG

In [3]:
ocel_in_file = "data/CS/source/cs_january_only_linked_ots[krs_krv_cv]_oas[oa1_oa2_oa3_oa4_oa5_oa6_oa7_oa8_oa10_oa11_oa12_oa13_oa15_oa16]_eas[resourceCE_resourceMulti_ea1_ea2_ea3_ea4_ea6_ea8_ea10_ea12_ea14].jsonocel"

In [4]:
ocpa_ocel = ocel_import_factory.apply(ocel_in_file)

In [3]:
transformations = [
    hetero_data_utils.AddObjectSelfLoops(),
    T.AddSelfLoops(),
    T.NormalizeFeatures(),
]
cs_hoeg_ds = HOEG(
    root="data/CS/feature_encodings/HOEG/hoeg",
    events_filename="CS_split_[C2_P2_P3_O3_eas].fs",
    objects_filename="cs_ofg+oi_graph+krs_node_map+krv_node_map+cv_node_map.pkl",
    event_node_label_key=(feature_factory.EVENT_REMAINING_TIME, ()),
    object_nodes_label_key="@@object_lifecycle_duration",
    edge_types=[
        ("event", "follows", "event"),
        ("krs", "interacts", "event"),
        ("krv", "interacts", "event"),
        ("cv", "interacts", "event"),
    ],
    object_node_types=["krs", "krv", "cv"],
    transform=T.Compose(transformations),
)
next(iter(data_utils.DataLoader(cs_hoeg_ds, batch_size=cs_hoeg_ds.len())))

Processing...
31277it [08:25, 61.90it/s]
Done!


HeteroDataBatch(
  [1mevent[0m={
    x=[695694, 15],
    y=[695694],
    batch=[695694],
    ptr=[31278]
  },
  [1mkrs[0m={
    x=[31513, 20],
    y=[31513],
    batch=[31513],
    ptr=[31278]
  },
  [1mkrv[0m={
    x=[31357, 20],
    y=[31357],
    batch=[31357],
    ptr=[31278]
  },
  [1mcv[0m={
    x=[31278, 20],
    y=[31278],
    batch=[31278],
    ptr=[31278]
  },
  [1m(event, follows, event)[0m={ edge_index=[2, 1360111] },
  [1m(krs, interacts, event)[0m={ edge_index=[2, 235756] },
  [1m(krv, interacts, event)[0m={ edge_index=[2, 466965] },
  [1m(cv, interacts, event)[0m={ edge_index=[2, 56137] }
)

In [4]:
data_utils.print_dataset_summaries(cs_hoeg_ds)

Train set


100%|██████████| 31277/31277 [00:34<00:00, 904.76it/s]

HOEG (#graphs=31277):
+------------+----------+----------+
|            |   #nodes |   #edges |
|------------+----------+----------|
| mean       |     25.3 |     70.8 |
| std        |     13   |     39.1 |
| min        |      7   |     16   |
| quantile25 |     16   |     43   |
| median     |     23   |     64   |
| quantile75 |     31   |     88   |
| max        |    161   |    478   |
+------------+----------+----------+ 






In [5]:
fs: Feature_Storage = feature_factory.apply(
    ocpa_ocel,
    execution_based_features=[(feature_factory.EXECUTION_THROUGHPUT, ())],
    min_execution_length=4,
)

100%|██████████| 269725/269725 [00:36<00:00, 7326.53it/s] 


In [6]:
trace_lengths = [fg.size for fg in fs.feature_graphs]
cs_events_stats = {
    "no_events": sum(trace_lengths),
    "min_trace_len": min(trace_lengths),
    "max_trace_len": max(trace_lengths),
    "median_trace_len": np.median(trace_lengths),
    "mean_trace_len": np.mean(trace_lengths),
    "std_trace_len": np.std(trace_lengths),
}
pprint(cs_events_stats)

{'max_trace_len': 158,
 'mean_trace_len': 22.242990056591104,
 'median_trace_len': 20.0,
 'min_trace_len': 4,
 'no_events': 695694,
 'std_trace_len': 13.034722696164092}


In [7]:
throughput_times = [fg.attributes[fs.execution_features[0]] for fg in fs.feature_graphs]
cs_cases_stats = {
    "no_cases": len(fs.feature_graphs),
    "min_tp": min(throughput_times),
    "max_tp": max(throughput_times),
    "median_tp": np.median(throughput_times),
    "mean_tp": np.mean(throughput_times),
    "std_tp": np.std(throughput_times),
}
pprint(cs_cases_stats)

{'max_tp': 10111075.021,
 'mean_tp': 2957048.255577485,
 'median_tp': 2432327.002,
 'min_tp': 1.143,
 'no_cases': 31277,
 'std_tp': 2354827.121669342}


In [13]:
with open("data/CS/feature_encodings/HOEG/hoeg/raw/cs_ofg+oi_graph+krs_node_map+krv_node_map+cv_node_map.pkl", 'rb') as fp:
    obj_dict = pickle.load(fp)
set(obj_dict['object_feature_matrices']['krs'].columns.tolist())

{'@@event_num_oa1',
 '@@event_num_oa10',
 '@@event_num_oa11',
 '@@event_num_oa12',
 '@@event_num_oa13',
 '@@event_num_oa15',
 '@@event_num_oa16',
 '@@event_num_oa2',
 '@@event_num_oa3',
 '@@event_num_oa4',
 '@@event_num_oa5',
 '@@event_num_oa6',
 '@@event_num_oa7',
 '@@event_num_oa8',
 '@@object_lifecycle_duration',
 'krs_index',
 'object_index'}

In [None]:
ocel = pm4py.read.read_ocel(ocel_in_file)
ocel_table = ocel.get_extended_table()

In [4]:
#
print(ocel.objects.shape)
ocel.objects["oa9"].value_counts()

(324035, 17)


0.835691    224507
0.164309     99528
Name: oa9, dtype: int64

In [4]:
print(ocel.events.shape)
ocel.events.head()

(1752998, 14)


Unnamed: 0,ocel:eid,ocel:timestamp,ocel:activity,event_ea10,event_ea8,event_ea2,event_ea12,event_ea4,event_ea6,event_resourceCE,event_ea3,event_ea14,event_resourceMulti,event_ea1
0,1,2023-01-01 10:06:44.609,Finish Task,0.224765,0.216395,0.526823,0.274337,4.0,0.26262,0.825684,0.01904,0.019645,0,0.198374
1,2,2023-01-01 10:08:48.993,Finish Task,0.224765,0.216395,0.526823,0.274337,4.0,0.26262,0.825684,0.01904,0.019645,0,0.198374
2,3,2023-01-01 10:08:49.088,Update Object State,0.224765,0.216395,0.526823,0.274337,4.0,0.26262,0.825684,0.01904,0.019645,0,0.198374
3,4,2023-01-01 10:08:49.091,Reallocate Task,0.224765,0.216395,0.526823,0.274337,4.0,0.26262,0.825684,0.01904,0.019645,1,0.198374
4,5,2023-01-01 10:09:46.678,Finish Task,0.040257,0.247333,-0.061526,0.274337,3.0,0.26262,0.825684,0.0739,0.019645,0,0.198374


In [5]:
with open(ocel_in_file, "r") as ocel:
    data = json.load(ocel)

In [29]:
res = []
for e in data["ocel:events"].values():
    res.extend(e["ocel:omap"])

In [30]:
failure = "KRV-3723804"
res.count(failure)

2