In [40]:
# SELF REMINDER. Copy the 'ocpa' directory to the forked one from github, so that I can push updates to github.

# Python native
import pickle
from statistics import median as median
from tqdm import tqdm
import os
os.chdir("c:\\Users\\Tim\\Development\\OCELFeatureExtractionExperiments")
from copy import deepcopy

# Data handling
import pandas as pd
import numpy as np

# Object centric process mining
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage

import torch

In [2]:
with open(f"data/ocpa-processed/raw/BPI2017-feature_storage_split.pkl", "rb") as file:
    feature_storage = pickle.load(file)

In [3]:
fg = feature_storage.feature_graphs[000]

In [4]:
node = fg.nodes[0]
len(fg.nodes)

41

In [5]:
node_feats = list(node.attributes.values())
node_feats

[0.039741438348965256,
 0.0,
 9.23273930980806,
 -0.10730642967938298,
 -0.1280585556104015,
 -0.24937157093548487,
 -0.18464702984559797,
 -0.39960553353094036,
 -0.2946012287100759,
 -0.098614486402827,
 -0.25076514543722306,
 -0.5247245193066771,
 -0.04993664963672101,
 -0.013590128816165387,
 -0.0026910077837613636,
 -0.28648967973671924,
 -0.32894991470406243,
 -0.2946012287100759,
 -0.1096638634668562,
 -0.029491222951741464,
 -0.21351486405402273,
 -0.06861784902634376,
 2.9991976827164257,
 -0.23399299353503455]

In [6]:
node_feats.append(1)

In [41]:
node_copy = deepcopy(node)
fg_copy = deepcopy(fg)

In [7]:
def get_node_features(feature_graph: FeatureStorage.Feature_Graph) -> torch.Tensor:
    """
    This will return a matrix / 2d array of the shape
    [Number of Nodes, Node Feature size]
    """
    node_feature_matrix: list[list[torch.float]] = []

    for node in feature_graph.nodes:
        node_feats = list(node.attributes.values())
        # Append node features to matrix
        node_feature_matrix.append(node_feats)

    node_feature_matrix = np.asarray(node_feature_matrix)
    return torch.tensor(node_feature_matrix, dtype=torch.float)


In [8]:
node.event_id

349315

In [9]:
[node.event_id for node in fg.nodes]

[349315,
 349322,
 349333,
 349338,
 349343,
 349220,
 349222,
 349351,
 349224,
 349352,
 349226,
 349355,
 349228,
 349229,
 349230,
 349354,
 349232,
 349360,
 349363,
 349364,
 349366,
 349238,
 349239,
 349368,
 349369,
 349242,
 349370,
 349371,
 349245,
 349241,
 349247,
 349367,
 349253,
 349269,
 349270,
 349276,
 349277,
 349290,
 349291,
 349307,
 349309]

In [10]:
enumerate(pd.DataFrame([1,23,4,4,5,56,6,6,7,8]).index)

<enumerate at 0x1b755daf580>

In [11]:
edge = fg.edges[0]
print(edge.source)
edge.target

349315


349322

In [12]:
def get_adjacency_info(feature_graph: FeatureStorage.Feature_Graph) -> torch.Tensor:
        """
        Function that returns the directed adjacency matrix in COO format, given a graph
        [2, Number of edges]
        """
        # Map event_id to node_index (counting from 0) using a dictionary
        node_index_map = {
            id: i
            for i, id in enumerate([node.event_id for node in feature_graph.nodes])
        }
        # Actually map event_id to node_index
        # so we have an index-based (event_id-agnostic) directed COO adjacency_matrix.
        adjacency_matrix_COO = [
                [node_index_map[e.source] for e in feature_graph.edges],
                [node_index_map[e.target] for e in feature_graph.edges],
            ]

        return torch.tensor(adjacency_matrix_COO, dtype=torch.long)

In [13]:
rem_times = [node.attributes[("event_remaining_time", ())] for node in fg.nodes]
sorted(rem_times)

[-0.9893098759134624,
 -0.9893098659424169,
 -0.9893098592950532,
 -0.98930985347861,
 -0.9893098476621668,
 -0.9893098410148032,
 -0.9893098310437576,
 -0.9893098277200758,
 -0.9877787479228477,
 -0.6392069869823708,
 -0.6391946195622515,
 -0.6379436164624697,
 -0.6379331601594072,
 -0.5444421579523229,
 -0.5436335028374455,
 -0.5436334961900817,
 -0.47703311663879994,
 -0.4770331099914363,
 -0.4678090129461043,
 0.039741438348965256,
 0.09347081095812397,
 2.159864902264432,
 2.160011410159852,
 3.96035538035428,
 3.960373564217616,
 4.537440200483287,
 4.5374527606769535,
 5.197870983391954,
 5.492397419204066,
 6.842259495864558,
 6.842268932628217,
 6.842281055757721,
 6.842287153052053,
 8.634564927156203,
 8.634564939620011,
 8.634564970364067,
 8.634586342469207,
 8.634593374549056,
 8.63468200467987,
 8.63483330200082,
 8.635773864076803]

In [14]:

def split_X_y(feature_graph: FeatureStorage.Feature_Graph, label_key
    ) -> tuple[FeatureStorage.Feature_Graph, list[torch.float]]:
        """
        Function that takes the target label from a feature graph 
        and returns them both separately in a tuple of shape
        [A Feature_Graph, Number of Nodes]
        """
        a = [node.attributes[label_key] for node in feature_graph.nodes]
        [{key: val for key, val in node.attributes.items() if key != label_key} for node in feature_graph.nodes]
        y=[]
        
        return feature_graph, y

In [15]:
split_fg, y = split_X_y(fg, ("event_remaining_time", ()))

[0.039741438348965256, -0.4678090129461043, -0.47703311663879994, -0.5436335028374455, -0.5444421579523229, 8.635773864076803, 8.634564939620011, -0.6379331601594072, 8.63483330200082, -0.6379436164624697, 8.63468200467987, -0.6392069869823708, 8.634593374549056, 8.634586342469207, 8.634564970364067, -0.6391946195622515, 8.634564927156203, -0.9877787479228477, -0.9893098277200758, -0.9893098310437576, -0.9893098410148032, 6.842287153052053, 6.842281055757721, -0.98930985347861, -0.9893098592950532, 6.842259495864558, -0.9893098659424169, -0.9893098759134624, 5.197870983391954, 6.842268932628217, 5.492397419204066, -0.9893098476621668, -0.5436334961900817, 4.5374527606769535, 4.537440200483287, 3.960373564217616, 3.96035538035428, 2.160011410159852, 2.159864902264432, -0.4770331099914363, 0.09347081095812397]


In [16]:
split_fg

<ocpa.algo.predictive_monitoring.obj.Feature_Storage.Feature_Graph at 0x1b726386e20>

In [39]:
def _split_X_y(self,feature_graph: FeatureStorage.Feature_Graph, label_key
    ) -> list[torch.float]:
        """
        Impure function that splits off the target label from a feature graph 
        and returns them both separately in a tuple of shape
        [A Feature_Graph, Number of Nodes]
        """
        ys = [node.attributes.pop(label_key) for node in feature_graph.nodes]
        
        return ys

In [51]:
ys = test_rem_y(fg_copy, ("event_remaining_time", ()))

KeyError: ('event_remaining_time', ())

In [50]:
fg.nodes[0].attributes

{('event_remaining_time', ()): 0.039741438348965256,
 ('event_previous_type_count', ('GDSRCPT',)): 0.0,
 ('event_elapsed_time', ()): 9.23273930980806,
 ('event_preceding_activities', ('Pending',)): -0.10730642967938298,
 ('event_preceding_activities', ('Cancel offer',)): -0.1280585556104015,
 ('event_preceding_activities', ('Return',)): -0.24937157093548487,
 ('event_preceding_activities', ('Cancel application',)): -0.18464702984559797,
 ('event_preceding_activities', ('Call',)): -0.39960553353094036,
 ('event_preceding_activities', ('Accept',)): -0.2946012287100759,
 ('event_preceding_activities', ('Handle leads',)): -0.098614486402827,
 ('event_preceding_activities',
  ('Call incomplete files',)): -0.25076514543722306,
 ('event_preceding_activities', ('Create offer',)): -0.5247245193066771,
 ('event_preceding_activities', ('Refuse offer',)): -0.04993664963672101,
 ('event_preceding_activities',
  ('Shorten completion',)): -0.013590128816165387,
 ('event_preceding_activities',
  ('Per

In [49]:
fg_copy.nodes[0].attributes

{('event_previous_type_count', ('GDSRCPT',)): 0.0,
 ('event_elapsed_time', ()): 9.23273930980806,
 ('event_preceding_activities', ('Pending',)): -0.10730642967938298,
 ('event_preceding_activities', ('Cancel offer',)): -0.1280585556104015,
 ('event_preceding_activities', ('Return',)): -0.24937157093548487,
 ('event_preceding_activities', ('Cancel application',)): -0.18464702984559797,
 ('event_preceding_activities', ('Call',)): -0.39960553353094036,
 ('event_preceding_activities', ('Accept',)): -0.2946012287100759,
 ('event_preceding_activities', ('Handle leads',)): -0.098614486402827,
 ('event_preceding_activities',
  ('Call incomplete files',)): -0.25076514543722306,
 ('event_preceding_activities', ('Create offer',)): -0.5247245193066771,
 ('event_preceding_activities', ('Refuse offer',)): -0.04993664963672101,
 ('event_preceding_activities',
  ('Shorten completion',)): -0.013590128816165387,
 ('event_preceding_activities',
  ('Personal loan collection',)): -0.0026910077837613636,
 (

In [38]:
node_copy.attributes = x[0]
node_copy.attributes

AttributeError: can't set attribute