In [15]:
import torch
import pandas as pd
import numpy as np
import pm4py
from pm4py.algo.transformation.ocel.features.objects import algorithm as object_feature_factory
from sklearn.preprocessing import StandardScaler

In [16]:
# WHAT HAPPENS WITH BROADCASTING
# Create a tensor of shape [2, 3]
tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]])

# Create a tensor of shape [1, 3]
tensor2 = torch.tensor([[10, 20, 30]])

# Perform element-wise addition
result = tensor1 + tensor2
result

tensor([[11, 22, 33],
        [14, 25, 36]])

In [17]:
def normalize_columns(df: pd.DataFrame, col_names: list[str]) -> pd.DataFrame:
    df[col_names] = (df[col_names] - df[col_names].mean()) / df[col_names].std()
    return df

In [18]:
ocel_file = "../data/BPI17/source/BPI2017-Final.jsonocel"


# %%
# load OCEL
ocel = pm4py.read.read_ocel(ocel_file)

# %%
# encode boolean variables
ocel.objects["event_Accepted"] = ocel.objects["event_Accepted"].replace(
    {True: 1, False: 0}
)
ocel.objects["event_Selected"] = ocel.objects["event_Selected"].replace(
    {True: 1, False: 0}
)
ocel.objects = ocel.objects.reset_index().rename(columns={"index": "object_index"})

# %%
# define object attributes per object type
application_attributes = {
    "str": [
        "event_LoanGoal",
        "event_ApplicationType",
    ],
    "num": [
        "event_RequestedAmount",
    ],
}
offer_attributes = {
    "str": [],
    "num": [
        "event_NumberOfTerms",
        "event_Accepted",
        "event_Selected",
        "event_OfferedAmount",
        "event_CreditScore",
        "event_FirstWithdrawalAmount",
        "event_MonthlyCost",
    ],
}

# %%
# create object-level feature matrix
data, feature_names = object_feature_factory.apply(
    ocel,
    parameters={
        "str_obj_attr": ["ocel:type"]
        + application_attributes["str"]
        + offer_attributes["str"],
        "num_obj_attr": ["object_index"]  # include object_index for reference
        + application_attributes["num"]
        + offer_attributes["num"],
    },
)

In [65]:
df = pd.DataFrame(data, columns=feature_names)
df.head()
# normalize_columns(df, ['ocel:eid'])

Unnamed: 0,@@object_lifecycle_length,@@object_lifecycle_duration,@@object_lifecycle_start_timestamp,@@object_lifecycle_end_timestamp,@@object_degree_centrality,@@object_general_interaction_graph,@@object_general_descendants_graph_ascendants,@@object_general_descendants_graph_descendants,@@object_general_inheritance_graph_ascendants,@@object_general_inheritance_graph_descendants,...,@@event_num_event_NumberOfTerms,@@event_num_event_Accepted,@@event_num_event_Selected,@@event_num_event_OfferedAmount,@@event_num_event_CreditScore,@@event_num_event_FirstWithdrawalAmount,@@event_num_event_MonthlyCost,@@object_interaction_graph_application,@@object_interaction_graph_offer,@@object_lifecycle_unq_act
0,11,1144676.116,1451645000.0,1452790000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,10
1,5,1052406.062,1451738000.0,1452790000.0,1.3e-05,1,1,0,0,0,...,44.0,1.0,1.0,20000.0,979.0,20000.0,498.29,1,0,5
2,9,530018.225,1451647000.0,1452177000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,9
3,5,440829.268,1451736000.0,1452177000.0,1.3e-05,1,1,0,0,0,...,33.0,0.0,0.0,6000.0,0.0,500.0,200.0,1,0,5
4,13,1107636.273,1451651000.0,1452758000.0,2.7e-05,2,0,2,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2,11


In [66]:
# x = df.values # returns a numpy array
scaler = StandardScaler()
df.iloc[:,1:2] = scaler.fit_transform(df.iloc[:,1:2])
df.head()

Unnamed: 0,@@object_lifecycle_length,@@object_lifecycle_duration,@@object_lifecycle_start_timestamp,@@object_lifecycle_end_timestamp,@@object_degree_centrality,@@object_general_interaction_graph,@@object_general_descendants_graph_ascendants,@@object_general_descendants_graph_descendants,@@object_general_inheritance_graph_ascendants,@@object_general_inheritance_graph_descendants,...,@@event_num_event_NumberOfTerms,@@event_num_event_Accepted,@@event_num_event_Selected,@@event_num_event_OfferedAmount,@@event_num_event_CreditScore,@@event_num_event_FirstWithdrawalAmount,@@event_num_event_MonthlyCost,@@object_interaction_graph_application,@@object_interaction_graph_offer,@@object_lifecycle_unq_act
0,11,-0.558083,1451645000.0,1452790000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,10
1,5,-0.640529,1451738000.0,1452790000.0,1.3e-05,1,1,0,0,0,...,44.0,1.0,1.0,20000.0,979.0,20000.0,498.29,1,0,5
2,9,-1.107298,1451647000.0,1452177000.0,1.3e-05,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,9
3,5,-1.186991,1451736000.0,1452177000.0,1.3e-05,1,1,0,0,0,...,33.0,0.0,0.0,6000.0,0.0,500.0,200.0,1,0,5
4,13,-0.591179,1451651000.0,1452758000.0,2.7e-05,2,0,2,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2,11


In [89]:
hoeg_loss = 1.0612298846244812
oft_loss = 0.9834069699259738
restored_losses = scaler.inverse_transform([hoeg_loss, oft_loss])
restored_hoeg_loss = restored_losses[0]
restored_oft_loss = restored_losses[1]
print(f"Loss differences when denormalized: {restored_hoeg_loss - restored_oft_loss}")

Loss differences when denormalized: 87096.0892041591
