In [6]:
# SELF REMINDER. Copy the 'ocpa' directory to the forked one from github, so that I can push updates to github.

# Python natives
import pickle
from typing import Any

import ocpa.algo.predictive_monitoring.factory as feature_factory
import ocpa.objects.log.importer.csv.factory as csv_import_factory

# Object centric process mining
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
from ocpa.objects.log.ocel import OCEL
from ocpa.algo.predictive_monitoring import tabular, sequential

# Simple machine learning models, procedure tools, and evaluation metrics
from sklearn.preprocessing import PowerTransformer, StandardScaler



In [10]:


def load_ocel(
    ocel_filename: str,
    parameters: dict[str, Any] = None,
) -> OCEL:
    extension = ocel_filename.split(".")[-1]
    if extension == "csv":
        return csv_import_factory.apply(
            ocel_filename, csv_import_factory.TO_OCEL, parameters
        )
    elif extension == "jsonocel":
        return ocel_import_factory.apply(ocel_filename)
    elif extension == "xmlocel":
        raise Exception("XMLOCEL is not supported yet")
    else:
        raise Exception(f"{extension} is not supported yet")


def build_feature_storage(ocel, feature_set: list = ["default"]) -> Feature_Storage:
    print("Constructing FeatureStorage object")
    if feature_set == ["default"]:
        return feature_factory.apply(
            ocel,
            event_based_features=[
                (feature_factory.EVENT_PRECEDING_ACTIVITIES, (act,))
                for act in ocel.log.log["event_activity"]
                .unique()
                .tolist()  # all unique activities in the log
            ]  # C2
            + [
                (feature_factory.EVENT_ELAPSED_TIME, ()),  # P2
                (feature_factory.EVENT_REMAINING_TIME, ()),  # P3
                (
                    feature_factory.EVENT_PREVIOUS_TYPE_COUNT,
                    (ocel.obj.meta.obj_types[0],),  # pick first object type as feature
                ),  # O3
            ],
        )
    return feature_factory.apply(ocel, event_based_features=feature_set)


def pickle_feature_storage(feature_storage: Feature_Storage, path: str) -> None:
    print("Pickling FeatureStorage object")
    with open(
        path,
        "wb",
    ) as file:
        pickle.dump(feature_storage, file)

In [11]:
ocel_file = "../../../data/BPI17/source/BPI2017-Final.jsonocel"
feature_storage_out_file = "../../../data/BPI17/feature_encodings/EFG/efg/raw/BPI2017-feature_storage-split-[C2,P2,P3,O3].fs"


In [4]:
ocel = load_ocel(ocel_file)

In [5]:
feature_storage = build_feature_storage(ocel)

Constructing FeatureStorage object
Applying feature extraction to process executions


100%|██████████| 31509/31509 [00:57<00:00, 544.27it/s]


In [12]:
pickle_feature_storage(feature_storage, feature_storage_out_file)

Pickling FeatureStorage object
