In [1]:
import torch
import pandas as pd
import numpy as np
import pm4py
from pm4py.algo.transformation.ocel.features.objects import (
    algorithm as object_feature_factory,
)
from sklearn.preprocessing import StandardScaler
from typing import Any, Callable
import pickle
import re
import string

# Object centric process mining
import ocpa.objects.log.importer.ocel.factory as ocel_import_factory
from ocpa.algo.predictive_monitoring.obj import Feature_Storage
from ocpa.objects.log.ocel import OCEL
import ocpa.algo.predictive_monitoring.factory as feature_factory
from ocpa.algo.predictive_monitoring import tabular
import ocpa.objects.log.importer.csv.factory as csv_import_factory

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
prefix = "../"
ocel_in_file = f"{prefix}data/BPI17/source/BPI2017-CountEncoded.jsonocel"
event_attributes = [
    "event_Action_ce",
    "event_EventOrigin_ce",
    "event_OrgResource_ce",
]
feature_storage_out_file = f"{prefix}data/BPI17/feature_encodings/EFG/efg/raw/BPI_split_[C2_P2_P3_P5_O3_Action_EventOrigin_OrgResource].fs"

In [3]:
ocel = ocel_import_factory.apply(ocel_in_file)
activities = ocel.log.log["event_activity"].unique().tolist()

# Adams used just C2, D1, P2, and O3 with P3 as the target variable
event_level_features = {
    f"C2{char}": (feature_factory.EVENT_PRECEDING_ACTIVITIES, (act,))
    for act, char in zip(activities, string.ascii_lowercase)
} | {
    # "D1": (
    #     feature_factory.EVENT_AGG_PREVIOUS_CHAR_VALUES,
    #     ("event_RequestedAmount", max),
    # ), # error ('event_RequestedAmount' has become an object attribute)
    "P2": (feature_factory.EVENT_ELAPSED_TIME, ()),
    "P3": (feature_factory.EVENT_REMAINING_TIME, ()),
    # "P4": (feature_factory.EVENT_FLOW_TIME, ()),
    "P5": (feature_factory.EVENT_SYNCHRONIZATION_TIME, ()),
    # "P6": (feature_factory.EVENT_SOJOURN_TIME, ()),
    # "P7": (feature_factory.EVENT_POOLING_TIME, ()), # error
    # "P8": (feature_factory.EVENT_LAGGING_TIME, ()), # error
    # "P9": (feature_factory.EVENT_SERVICE_TIME, ()), # error
    # "P10": (feature_factory.EVENT_WAITING_TIME, ()), # error
    "O3": (feature_factory.EVENT_PREVIOUS_TYPE_COUNT, ("offer",)),
}
feature_storage = feature_factory.apply(
    ocel,
    event_based_features=list(event_level_features.values()),
    event_attributes=event_attributes,
)

In [5]:
feature_storage.extract_normalized_train_test_split(scaler=StandardScaler,test_size=0.3,validation_size=0.2*0.7)

In [6]:
feature_storage._event_id_table(feature_storage.feature_graphs)

Unnamed: 0,event_id,"(event_preceding_activities, (Create application,))","(event_preceding_activities, (Submit,))","(event_preceding_activities, (Handle leads,))","(event_preceding_activities, (Accept,))","(event_preceding_activities, (Create offer,))","(event_preceding_activities, (Send (online),))","(event_preceding_activities, (Complete,))","(event_preceding_activities, (Call,))","(event_preceding_activities, (Cancel offer,))",...,"(event_preceding_activities, (Assess potential fraud,))","(event_preceding_activities, (Personal loan collection,))","(event_preceding_activities, (Shorten completion,))","(event_elapsed_time, ())","(event_remaining_time, ())","(event_synchronization_time, ())","(event_previous_type_count, (offer,))",event_Action_ce,event_EventOrigin_ce,event_OrgResource_ce
0,116864,-0.295218,-0.233543,-0.09739,-0.295218,1.894681,-0.070454,-0.286935,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.699928,8.664050,-0.161228,1.072969,0.811663,1.190012,-0.398756
1,116865,-0.295218,-0.233543,-0.09739,-0.295218,4.313759,-0.070454,-0.286935,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.699920,8.664043,-0.160873,1.072969,0.811663,1.190012,-0.398756
2,116866,-0.295218,-0.233543,-0.09739,-0.295218,1.894681,-0.070454,-0.286935,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.699896,8.664021,-0.161228,1.072969,0.811663,1.190012,-0.398756
3,116867,-0.295218,-0.233543,-0.09739,-0.295218,-0.524398,-0.070454,3.485107,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.699896,8.664021,-0.161227,1.072969,-0.925495,-0.316574,-0.398756
4,116868,-0.295218,-0.233543,-0.09739,-0.295218,-0.524398,-0.070454,-0.286935,2.502105,-0.134975,...,-0.025228,-0.003014,-0.013141,1.365369,6.866140,-0.161228,2.195271,0.811663,1.190012,-0.318209
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
393926,391976,3.387323,-0.233543,-0.09739,-0.295218,-0.524398,-0.070454,-0.286935,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.701289,-0.625778,-0.161228,-1.171635,-0.925495,-0.316574,-0.278062
393927,391977,-0.295218,-0.233543,-0.09739,-0.295218,-0.524398,-0.070454,3.485107,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.283164,-0.989769,-0.161228,-1.171635,0.811663,-1.207062,-0.467203
393928,391978,-0.295218,-0.233543,-0.09739,3.387323,-0.524398,-0.070454,-0.286935,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.282978,-0.989931,-0.161228,-0.049333,0.811663,1.190012,-0.467203
393929,391979,-0.295218,-0.233543,-0.09739,-0.295218,1.894681,-0.070454,-0.286935,-0.399511,-0.134975,...,-0.025228,-0.003014,-0.013141,-0.282968,-0.989940,-0.161228,-0.049333,0.811663,1.190012,-0.467203


In [16]:
# WHAT HAPPENS WITH BROADCASTING
# Create a tensor of shape [2, 3]
tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]])

# Create a tensor of shape [1, 3]
tensor2 = torch.tensor([[10, 20, 30]])

# Perform element-wise addition
result = tensor1 + tensor2
result

tensor([[11, 22, 33],
        [14, 25, 36]])

In [17]:
def normalize_columns(df: pd.DataFrame, col_names: list[str]) -> pd.DataFrame:
    df[col_names] = (df[col_names] - df[col_names].mean()) / df[col_names].std()
    return df

In [18]:
ocel_file = "../data/BPI17/source/BPI2017-Final.jsonocel"


# %%
# load OCEL
ocel = pm4py.read.read_ocel(ocel_file)

# %%
# encode boolean variables
ocel.objects["event_Accepted"] = ocel.objects["event_Accepted"].replace(
    {True: 1, False: 0}
)
ocel.objects["event_Selected"] = ocel.objects["event_Selected"].replace(
    {True: 1, False: 0}
)
ocel.objects = ocel.objects.reset_index().rename(columns={"index": "object_index"})

# %%
# define object attributes per object type
application_attributes = {
    "str": [
        "event_LoanGoal",
        "event_ApplicationType",
    ],
    "num": [
        "event_RequestedAmount",
    ],
}
offer_attributes = {
    "str": [],
    "num": [
        "event_NumberOfTerms",
        "event_Accepted",
        "event_Selected",
        "event_OfferedAmount",
        "event_CreditScore",
        "event_FirstWithdrawalAmount",
        "event_MonthlyCost",
    ],
}

# %%
# create object-level feature matrix
data, feature_names = object_feature_factory.apply(
    ocel,
    parameters={
        "str_obj_attr": ["ocel:type"]
        + application_attributes["str"]
        + offer_attributes["str"],
        "num_obj_attr": ["object_index"]  # include object_index for reference
        + application_attributes["num"]
        + offer_attributes["num"],
    },
)

In [65]:
df = pd.DataFrame(data, columns=feature_names)
df.head()
# normalize_columns(df, ['ocel:eid'])

Unnamed: 0,@@object_lifecycle_length,@@object_lifecycle_duration,@@object_lifecycle_start_timestamp,@@object_lifecycle_end_timestamp,@@object_degree_centrality,@@object_general_interaction_graph,@@object_general_descendants_graph_ascendants,@@object_general_descendants_graph_descendants,@@object_general_inheritance_graph_ascendants,@@object_general_inheritance_graph_descendants,...,@@event_num_event_NumberOfTerms,@@event_num_event_Accepted,@@event_num_event_Selected,@@event_num_event_OfferedAmount,@@event_num_event_CreditScore,@@event_num_event_FirstWithdrawalAmount,@@event_num_event_MonthlyCost,@@object_interaction_graph_application,@@object_interaction_graph_offer,@@object_lifecycle_unq_act
0,11,1144676.116,1451645000.0,1452790000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,10
1,5,1052406.062,1451738000.0,1452790000.0,1.3e-05,1,1,0,0,0,...,44.0,1.0,1.0,20000.0,979.0,20000.0,498.29,1,0,5
2,9,530018.225,1451647000.0,1452177000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,9
3,5,440829.268,1451736000.0,1452177000.0,1.3e-05,1,1,0,0,0,...,33.0,0.0,0.0,6000.0,0.0,500.0,200.0,1,0,5
4,13,1107636.273,1451651000.0,1452758000.0,2.7e-05,2,0,2,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2,11


In [66]:
# x = df.values # returns a numpy array
scaler = StandardScaler()
df.iloc[:,1:2] = scaler.fit_transform(df.iloc[:,1:2])
df.head()

Unnamed: 0,@@object_lifecycle_length,@@object_lifecycle_duration,@@object_lifecycle_start_timestamp,@@object_lifecycle_end_timestamp,@@object_degree_centrality,@@object_general_interaction_graph,@@object_general_descendants_graph_ascendants,@@object_general_descendants_graph_descendants,@@object_general_inheritance_graph_ascendants,@@object_general_inheritance_graph_descendants,...,@@event_num_event_NumberOfTerms,@@event_num_event_Accepted,@@event_num_event_Selected,@@event_num_event_OfferedAmount,@@event_num_event_CreditScore,@@event_num_event_FirstWithdrawalAmount,@@event_num_event_MonthlyCost,@@object_interaction_graph_application,@@object_interaction_graph_offer,@@object_lifecycle_unq_act
0,11,-0.558083,1451645000.0,1452790000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,10
1,5,-0.640529,1451738000.0,1452790000.0,1.3e-05,1,1,0,0,0,...,44.0,1.0,1.0,20000.0,979.0,20000.0,498.29,1,0,5
2,9,-1.107298,1451647000.0,1452177000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,9
3,5,-1.186991,1451736000.0,1452177000.0,1.3e-05,1,1,0,0,0,...,33.0,0.0,0.0,6000.0,0.0,500.0,200.0,1,0,5
4,13,-0.591179,1451651000.0,1452758000.0,2.7e-05,2,0,2,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2,11


In [89]:
hoeg_loss = 1.0612298846244812
oft_loss = 0.9834069699259738
restored_losses = scaler.inverse_transform([hoeg_loss, oft_loss])
restored_hoeg_loss = restored_losses[0]
restored_oft_loss = restored_losses[1]
print(f"Loss differences when denormalized: {restored_hoeg_loss - restored_oft_loss}")

Loss differences when denormalized: 87096.0892041591
