In [45]:
""" This script transforms BPI2017 to a full OCEL, with separated events and objects and their attributes. """

# Python natives
import heapq
import random
import pickle
import json
from copy import copy
from typing import Any
import logging

import ocpa.algo.predictive_monitoring.factory as feature_factory
import ocpa.objects.log.importer.csv.factory as csv_import_factory

# Object centric process mining
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
from ocpa.objects.log.exporter.ocel import factory as ocel_export_factory
from ocpa.objects.log.ocel import OCEL

# Simple machine learning models, procedure tools, and evaluation metrics
from sklearn.preprocessing import PowerTransformer, StandardScaler


def load_ocel(
    ocel_filename: str,
    parameters: dict[str, Any] = None,
) -> OCEL:
    extension = ocel_filename.split(".")[-1]
    if extension == "csv":
        return csv_import_factory.apply(
            ocel_filename, csv_import_factory.TO_OCEL, parameters
        )
    elif extension == "jsonocel":
        return ocel_import_factory.apply(ocel_filename)
    elif extension == "xmlocel":
        raise Exception("XMLOCEL is not supported yet")
    else:
        raise Exception(f"{extension} is not supported yet")

ocel_in_file = "BPI17/BPI2017-Final.csv"
ocel_out_file = "BPI17/BPI2017.jsonocel"

In [46]:
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    filename="debug/debug.log",
)
logging.critical("-" * 32)

In [47]:
# Define event attributes
event_attributes = [
    "event_Action",
    "event_EventOrigin",
    # "event_org:resource",
]

# Define object attributes
application_attributes = [
    "event_RequestedAmount",
    "event_ApplicationType",
    "event_LoanGoal",
]
offer_attributes = [
    "event_OfferedAmount",
    "event_CreditScore",
    "event_Selected",
    "event_MonthlyCost",
    "event_NumberOfTerms",
    "event_Accepted",
    "event_FirstWithdrawalAmount",
]

# Load full BPI17 CSV as OCEL
ocel = load_ocel(
    ocel_filename=ocel_in_file,
    parameters={
        "obj_names": ["application", "offer"],
        "val_names": event_attributes + offer_attributes + application_attributes,
        "act_name": "event_activity",
        "time_name": "event_timestamp",
        "sep": ",",
    },
)

# Export BPI17 OCEL as JSON
ocel_export_factory.apply(ocel, ocel_out_file)
print("Success")


Success


In [48]:
# Load JSONOCEL
with open(ocel_out_file) as json_data:
    data = json.load(json_data)


In [49]:
def sample_dict(n: int, dy: dict, seed:int=42) -> dict:
    random.seed(seed)
    return {k:dy[k] for k in random.sample(dy.keys(), n)}

events = copy(data['ocel:events'])
# events = sample_dict(1000, events)
objects = copy(data['ocel:objects'])

In [50]:
def get_related_event(
    object_id: str, events: dict[str, Any], attr_names: set[str]
) -> str:
    # there may be multiple events related to an object, but we just take one of them
    event_keys = {ekey for ekey in events if object_id in events[ekey]["ocel:omap"]}
    if event_keys:
        # # check if all passed attr_names are present as event attributes
        # set_of_ev_attr_names = {
        #     key: event["ocel:vmap"].keys()
        #     for key, event in [(k, events[k]) for k in event_keys]
        # }
        # for ev_key, ev_attr_names in set_of_ev_attr_names.items():
        #     if len(attr_names - ev_attr_names) == 0:
        #         # return only the event key, where the requested event attributes are present
        #         return ev_key
        return next(iter(event_keys))
    elif len(event_keys)>1:
        return next(next(iter(event_keys)))


def object_attributes_from_event_attributes(
    attr_names: set[str],
    object_type: str,
    events: dict[str, Any],
    objects: dict[str, Any],
) -> tuple[dict[str, Any], dict[str, Any]]:
    # for given object type get related events per object
    oid_events_map = {
        oid: get_related_event(oid, events,attr_names)
        for oid in (oid for oid, v in objects.items() if v["ocel:type"] == object_type)
    }
    oid_events_map = {
        k: v for k, v in oid_events_map.items() if v
    }  # filter out objects that have no related events

    # lookup event attributes and append to each object
    for oid, eid in oid_events_map.items():
        # gather object attributes and remove from events dictionary
        object_attributes = {
            attr_name: events[eid]["ocel:vmap"].pop(attr_name), None)
            for attr_name in attr_names
        }
        # object_attributes = dict()
        # for attr_name in attr_names:
        #     d= events[eid]
        #     logging.critical(f"{eid}: {d}, next: {attr_name}")
        #     object_attributes[attr_name] = events[eid]["ocel:vmap"].pop(attr_name,None)

        # append object attributes to objects dictionary
        objects[oid]["ocel:ovmap"] = object_attributes

    return events, objects


In [51]:
# Make offer object attributes
events, objects = object_attributes_from_event_attributes(
    set(offer_attributes), "offer", events, objects
)


# make params dict as imput with map from ot to oa's

In [None]:
# Make application object attributes
events, objects = object_attributes_from_event_attributes(
    set(application_attributes), "application", events, objects
)

In [None]:
x = {k:v for k,v in objects.items() if v['ocel:ovmap']}
# {k:v for k,v in x.items() if v['ocel:ovmap']['event_OfferedAmount']}
len(x)

500

In [None]:
# check which offer object attributes came through (which are present in all offer objects)
objects

# check which offer application attributes came through (which are present in all application objects)


In [None]:
y = {k:v for k,v in objects.items() if v['ocel:type']=='offer'}
len(y)

42995

In [None]:
data['ocel:objects']['Application_652823628']['ocel:ovmap'] = {'horse':3,'paart':'nee'}

In [18]:
data['ocel:objects']

{'Application_652823628': {'ocel:type': 'application', 'ocel:ovmap': {}},
 'Offer_148581083': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Application_1691306052': {'ocel:type': 'application', 'ocel:ovmap': {}},
 'Offer_1669071500': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Application_428409768': {'ocel:type': 'application', 'ocel:ovmap': {}},
 'Offer_1209840642': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Offer_997411923': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Application_1746793196': {'ocel:type': 'application', 'ocel:ovmap': {}},
 'Offer_1319158006': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Offer_774210695': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Application_828200680': {'ocel:type': 'application', 'ocel:ovmap': {}},
 'Offer_607758096': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Application_1085880569': {'ocel:type': 'application', 'ocel:ovmap': {}},
 'Offer_213805292': {'ocel:type': 'offer', 'ocel:ovmap': {}},
 'Offer_644568599': {'ocel:type': 'offer', 'ocel:ovmap