In [1]:
from ocpa.algo.predictive_monitoring.obj import Feature_Storage as FeatureStorage
import pm4py.ocel
from collections import defaultdict
import pickle
import timeit
import random

base_dir = "../data/BPI17/feature_encodings/HOEG/hoeg/raw"

In [2]:
def sample_dict(dictionary: dict, n: int):
    keys = random.sample(list(dictionary), n)
    return {key: dictionary[key] for key in keys}

In [3]:
feature_storage_file = (
    f"{base_dir}/BPI2017-feature_storage-split-[C1-3,C5,P1-6,O2,O3,O5].fs"
)
objects_data_dict = f"{base_dir}/bpi17_ofg+oi_graph+app_node_map+off_node_map.pkl"
with open(feature_storage_file, "rb") as f:
    fs: FeatureStorage = pickle.load(f)

with open(objects_data_dict, "rb") as f:
    objects_data = pickle.load(f)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
hetero_data = objects_data["ofg"]
fg31463 = fs.feature_graphs[31463]
[(node.event_id, node.objects[0][1]) for node in fg31463.nodes]
[(node.event_id, node.objects) for node in fg31463.nodes]

[(7403, [('application', 'Application_1299976284')]),
 (7404, [('application', 'Application_1299976284')]),
 (7405, [('application', 'Application_1299976284')]),
 (7406,
  [('application', 'Application_1299976284'), ('offer', 'Offer_499807586')]),
 (7407,
  [('application', 'Application_1299976284'), ('offer', 'Offer_499807586')]),
 (7408,
  [('application', 'Application_1299976284'), ('offer', 'Offer_499807586')])]

In [5]:
edge_type_example = ("event", "interacts", "application")
edge_type_example = ("event", "interacts", "offer")


def get_out_edges(event_node, edge_type):
    node_type = edge_type[2]
    edges = [
        (event_node.event_id, oid) for ot, oid in event_node.objects if ot == node_type
    ]
    return edges


flatten = lambda l: [item for sublist in l for item in sublist]

flatten([get_out_edges(node, edge_type_example) for node in fg31463.nodes])

[(7406, 'Offer_499807586'),
 (7407, 'Offer_499807586'),
 (7408, 'Offer_499807586')]

In [6]:
objects_data["object_feature_matrices"]["application"].head()
objects_data["object_feature_matrices"]["offer"].head()

Unnamed: 0,offer_index,@@event_num_event_NumberOfTerms,@@event_num_event_Accepted,@@event_num_event_Selected,@@event_num_event_OfferedAmount,@@event_num_event_CreditScore,@@event_num_event_FirstWithdrawalAmount,@@event_num_event_MonthlyCost,object_index,@@object_lifecycle_duration
0,0,44.0,1.0,1.0,20000.0,979.0,20000.0,498.29,1,1052406.062
1,1,33.0,0.0,0.0,6000.0,0.0,500.0,200.0,3,440829.268
2,2,120.0,1.0,0.0,15000.0,0.0,15000.0,158.98,5,1248.884
3,3,120.0,1.0,1.0,15000.0,1059.0,15000.0,158.98,6,1020869.024
4,4,72.0,0.0,1.0,15700.0,834.0,3726.0,252.73,8,2055574.011


In [7]:
e = objects_data["object_feature_vector_map"]
print(sample_dict(e["application"], 3))
print(sample_dict(e["offer"], 3))

{'Application_118524871': 27269, 'Application_747812656': 25583, 'Application_936478493': 15578}
{'Offer_1264595481': 13518, 'Offer_734403929': 12972, 'Offer_28484110': 914}


In [8]:
fg30643: FeatureStorage.Feature_Graph = fs.feature_graphs[30643]
unique_items = set(item for obj in fg30643.objects.values() for item in obj)
unique_items

{('application', 'Application_2016383446'), ('offer', 'Offer_1967823804')}

In [9]:
def generate_unique_objects(n, options=["application", "offer", "contract", "product"]):
    unique_items = set()
    for i in range(n):
        prefix = random.choice(options)
        item = (prefix, f"{prefix}_{i}")
        unique_items.add(item)
    return unique_items

In [10]:
# Define the set_to_split_dict functions
def set_to_split_dict(unique_items):
    result = defaultdict(list)
    for item in unique_items:
        result[item[0]].append(item[1])
    return dict(result)


set_to_split_dict(generate_unique_objects(10, ["application", "offer"]))

{'application': ['application_8',
  'application_0',
  'application_1',
  'application_7',
  'application_3'],
 'offer': ['offer_5', 'offer_4', 'offer_9', 'offer_2', 'offer_6']}

In [11]:
objects_data["object_feature_vector_map"]["offer"]["Offer_1282810220"]

12901

In [12]:
# import timeit

# def generate_dict_with_size(n):
#     keys = ['offer', 'application']
#     result = {}
#     for key in keys:
#         result[key] = random.sample(range(10000), n)
#     return result

# # test data
# test_dict = generate_dict_with_size(50)
# test_dict1 = generate_dict_with_size(50)

# # functions under test
# func0 = lambda x: pass
# func1 = lambda x: pass

# # Time the function execution
# execution_time = timeit.timeit(lambda: func0(test_dict), number=1_000_00)
# execution_time1 = timeit.timeit(lambda: func1(test_dict1), number=1_000_00)

# # Print the execution time
# print(f"Execution time: {execution_time} seconds")
# print(f"Execution time: {execution_time1} seconds")

In [13]:
def __get_event_node_index_mapping(
    feature_graph: FeatureStorage.Feature_Graph,
) -> dict[int, int]:
    """Returns a dictionary containing a mapping from event_ids to node indices in the given graph"""
    return {
        id: i for i, id in enumerate([node.event_id for node in feature_graph.nodes])
    }


def __get_event_object_edges(
    event_node: FeatureStorage.Feature_Graph.Node,
    edge_type: tuple[str, str, str],
) -> list[tuple[int, int]]:
    node_type = edge_type[2]
    edges = [
        (event_node.event_id, oid) for ot, oid in event_node.objects if ot == node_type
    ]
    return edges


def __get_edge_index_for_edge_type(
    feature_graph: FeatureStorage.Feature_Graph,
    edge_type: tuple[str, str, str],
    event_node_map: dict[int, int],
    object_node_map: dict[str, int],
) -> list[tuple[int, int]]:
    flatten = lambda nested_list: [item for sublist in nested_list for item in sublist]

    # From all nodes in the feature graph:
    # get tuples that indicate which event_id interacts with which application/offer (oid from the OCEL)
    edge_list = [
        flatten(
            [__get_event_object_edges(node, edge_type) for node in feature_graph.nodes]
        )
    ][0]
    # Map event_id to node_index for the application/offer node type
    edge_index = [
        (event_node_map[edge[0]], object_node_map[edge[1]]) for edge in edge_list
    ]
    return edge_index

In [19]:
fg0 = fs.feature_graphs[0]
edge_type = ("event", "interacts", "offer")
event_node_map = __get_event_node_index_mapping(fg30643)
object_node_map = objects_data["object_feature_vector_map"]["offer"]

__get_edge_index_for_edge_type(fg30643, edge_type, event_node_map, object_node_map)

[(3, 29864), (4, 29864), (5, 29864), (6, 29864), (7, 29864)]