In [1]:
from typing import Any, Callable
import pickle
import re

# Object centric process mining
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
from ocpa.objects.log.ocel import OCEL
import ocpa.algo.predictive_monitoring.factory as feature_factory
from ocpa.algo.predictive_monitoring import tabular
import ocpa.objects.log.importer.csv.factory as csv_import_factory

In [2]:
ocel_in_file = "../../../data/CS/source/cs_january_ots[krs_krv_cv]_eas[ea2_ea4_resource_robot_resource_person_resource_multi].jsonocel"
event_attributes = ["ea2", "ea4", "resource_robot", "resource_person", "resource_multi"]
feature_storage_out_file = "../../../data/CS/feature_encodings/EFG/raw/CS_split_[P2_P3_O3_ea2_ea4_resource_robot_person_multi].fs"
eft_out_file = '../../../data/CS/feature_encodings/baselines/EFT/eft.csv'

In [3]:
event_level_features = {
    "P2": (feature_factory.EVENT_ELAPSED_TIME, ()),
    "P3": (feature_factory.EVENT_REMAINING_TIME, ()),
    # "P4": (feature_factory.EVENT_FLOW_TIME, ()),
    "P5": (feature_factory.EVENT_SYNCHRONIZATION_TIME, ()),
    # "P6": (feature_factory.EVENT_SOJOURN_TIME, ()),
    
    # "P7": (feature_factory.EVENT_POOLING_TIME, ()), # error
    # "P8": (feature_factory.EVENT_LAGGING_TIME, ()), # error
    # "P9": (feature_factory.EVENT_SERVICE_TIME, ()), # error
    # "P10": (feature_factory.EVENT_WAITING_TIME, ()), # error
    
    # "O3": (feature_factory.EVENT_PREVIOUS_TYPE_COUNT, ("krv",)),
    "O3": (feature_factory.EVENT_PREVIOUS_TYPE_COUNT, ("krs",)),
    # "O3": (feature_factory.EVENT_PREVIOUS_TYPE_COUNT, ("cv",)),
}

In [4]:
ocel = ocel_import_factory.apply(ocel_in_file)

In [5]:
feature_storage = feature_factory.apply(
    ocel,
    event_based_features=list(event_level_features.values()),
    event_attributes=[f"event_{ea}" for ea in event_attributes],
)  # takes much too long resource (ot2) is an object type

Applying feature extraction to process executions


100%|██████████| 301667/301667 [07:42<00:00, 652.87it/s]  


In [6]:
feature_storage.extract_normalized_train_test_split(
    test_size=0.2,
    validation_size=0.2,
    state=42,
)

In [7]:
with open(feature_storage_out_file, "wb") as bin:
    pickle.dump(feature_storage, bin)

In [8]:
eft = tabular.construct_table(feature_storage)

In [9]:
# rename columns that contain JSON special characters (as they are not supported by LightGBM)
eft = eft.rename(columns=lambda col_name: re.sub("[^A-Za-z0-9_]+", "", str(col_name)))
eft.to_csv(eft_out_file, sep=";", index=False)