# VAE Approach

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [88]:
from ocpa.objects.log.importer.ocel import factory as ocel_import_factory
from ocpa.algo.discovery.ocpn import algorithm as ocpn_discovery_factory
from tqdm import tqdm
import numpy as np
import os
import random
from ocpa.objects.log.importer.csv import factory as ocel_import_factory_csv
from ocpa.visualization.oc_petri_net import factory as ocpn_vis_factory
from ocpa.algo.conformance.precision_and_fitness.variants import replay_context

In [18]:
def save_process_model_visualization(ocel, save_path) :
    """
    Function to generate the process model of an JSONOCEL-log and save it as svg.
    :param ocel: given OCEL-log, type: OCEL-Log
    :param save_path: path for the saved process model visualization, type: string
    """
    #change the environment path for visualization
    os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin'
    # get the object-centric petri net
    ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})
    # generate the visualization
    gviz = ocpn_vis_factory.apply(ocpn, parameters={'format': 'svg'})
    # save the visualization
    ocpn_vis_factory.save(gviz, save_path)

In [3]:
def filter_silent_transitions(dic,silent_transitions):
    """
    Function to filter out the silent transitions defined by a list from a given dictionary.
    :param dic: dictionary to be filtered, type: dictionary
    :param silent_transitions: list of silent transitions in an ocel log, type: list
    :return updated_dictionary: filtered dictionary, type: dictionary
    """
    updated_dictionary = {}
    for key, values in dic.items():
        if key not in silent_transitions:
            new_values = [val for val in values if val not in silent_transitions]
            updated_dictionary[key] = new_values
    return updated_dictionary

#recursive implementation of a depth-first search (DFS) algorithm
def dfs(graph, visited, activity, preceding_events):
    """
    Function to perform a depth-first search (DFS) algorithm on the activity graph.
    :param graph: activity graph, type: dictionary
    :param visited: set of already visited nodes, type: set
    :param activity: current activity, type: string
    :param preceding_events: list to store the preceding events, type: list
    """
    #takes as input the activity graph (represented as a dictionary), a set of visited nodes, the current activity, and a list to store the preceding events.
    visited.add(activity)
    for preceding_event in graph[activity]:
        #eighboring activity has not been visited yet, the algorithm visits it by calling the dfs function with the neighboring activity as the current activity.
        if preceding_event not in visited:
            dfs(graph, visited, preceding_event, preceding_events)
    preceding_events.append(activity)

# DS3 Log

In [4]:
filename = "../src/data/jsonocel/DS3.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})

In [84]:
from collections import Counter
import networkx as nx


def calculate_preset(eog):
    preset = {}
    for e in eog.nodes:
        #several different ways of calculating
        # USE THIS FOR LARGE EVENT LOGS
        preset[e] = list(nx.ancestors(eog,e))


        #stable speed also for later events, large logs with large connected components
        #preset[e] = [v for v in nx.dfs_predecessors(EOG, source=e).keys() if v!=e]

        #fast for small graphs/no connected component
        #preset[e] = [n for n in nx.traversal.bfs_tree(EOG, e, reverse=True) if n != e]
    return preset


def calculate_contexts_and_bindings(ocel):
    log = ocel.log.log.copy()
    object_types = ocel.object_types
    contexts = {}
    bindings = {}
    preset = calculate_preset(ocel.graph.eog)
    log["event_objects"] = log.apply(lambda x: [(ot,o) for ot in object_types for o in x[ot]], axis = 1)
    exploded_log = log.explode("event_objects")
    counter_e=0
    for event in preset.keys():
        counter_e+=1
        context = {}
        obs = list(set().union(*log.loc[log["event_id"].isin(preset[event]+[event])]["event_objects"].to_list()))
        binding_sequence = log.loc[log["event_id"].isin(preset[event])].apply(lambda y: (y["event_activity"], { ot : [o for (ot_,o) in y["event_objects"] if ot_ == ot] for ot in object_types}), axis = 1).values.tolist()
        for ob in obs:
            prefix = tuple(exploded_log[(exploded_log["event_objects"] == ob) & (exploded_log["event_id"].isin(preset[event]))]["event_activity"].to_list())
            if ob[0] not in context.keys():
                context[ob[0]] = Counter()
            context[ob[0]]+=Counter([prefix])
        contexts[event] = context
        bindings[event] = binding_sequence
    return contexts, bindings

In [90]:
object_types = ocel.object_types
contexts, bindings = calculate_contexts_and_bindings(ocel)

KeyboardInterrupt: 

In [None]:
en_m =  replay_context.enabled_model_activities_multiprocessing(contexts, bindings, ocpn, object_types)

In [65]:
# we create another dictionary that only contains the the value inside the list to be able to derive the case
mapping_dict = {key: ocel.process_execution_mappings[key][0] for key in ocel.process_execution_mappings}
# we generate a new column in the class (log) that contains the process execution (case) number via the generated dictionary
ocel.log.log['event_execution'] = ocel.log.log.index.map(mapping_dict)
# dictionary to store each activity as key and a list of its prior states/places as value
targets = {}
# dictionary to store each activity as key and a list of its following states/places as value
sources = {}
for arc in tqdm(ocpn.arcs, desc="Check the arcs"):
    # for each arc, check if our target is a valid transition
    if arc.target in ocpn.transitions:
        # load all the prior places of a valid transition into a dictionary, where the key is the transition and the value
        # a list of all directly prior places
        if arc.target.name in targets:
            targets[arc.target.name].append(arc.source.name)
        else:
            targets[arc.target.name] = [arc.source.name]
    if arc.source in ocpn.transitions:
        # load all the following places of a valid transition into a dictionary, where the key is the transition and the value
        # a list of all directly following places
        if arc.source.name in sources:
            sources[arc.source.name].append(arc.target.name)
        else:
            sources[arc.source.name] = [arc.target.name]
# generate an empty dictionary to store the directly preceeding transition of an activity
preceding_activities = {}
# use the key and value of targets and source to generate the dictionary
for target_key, target_value in targets.items():
    preceding_activities[target_key] = []
    for source_key, source_value in sources.items():
        for element in target_value:
            if element in source_value:
                preceding_activities[target_key].append(source_key)
                break
# generate an empty dictionary to store the directly succeeding transition of an activity
succeeding_activities = {}
for source_key, source_value in sources.items():
    succeeding_activities[source_key] = []
    for target_key, target_value in targets.items():
        for element in source_value:
            if element in target_value:
                succeeding_activities[source_key].append(target_key)
                break
# store the name of all silent transitions in the log
silent_transitions = [x.name for x in ocpn.transitions if x.silent]
# replace the silent transitions in the succeeding activities dictionary by creating a new dictionary to store the modified values
succeeding_activities_updated = {}
# Iterate through the dictionary
for key, values in succeeding_activities.items():
    # Create a list to store the modified values for this key
    new_values = []
    # Iterate through the values of each key
    for i in range(len(values)):
        # Check if the value is in the list of silent transitions
        if values[i] in silent_transitions:
            # Replace the value with the corresponding value from the dictionary
            new_values.extend(succeeding_activities[values[i]])
        else:
            # If the value is not in the list of silent transitions, add it to the new list
            new_values.append(values[i])
    # Add the modified values to the new dictionary
    succeeding_activities_updated[key] = new_values
# create an empty dictionary to store all the precedding activities of an activity
preceding_events_dict = {}
# use a depth-first search (DFS) algorithm to traverse the activity graph and
# create a list of all preceding events for each activity in the dictionary for directly preceding activities
for activity in preceding_activities:
    # empty set for all the visited activities
    visited = set()
    # list for all currently preceding events
    preceding_events = []
    dfs(preceding_activities, visited, activity, preceding_events)
    # we need to remove the last element from the list because it corresponds to the activity itself
    preceding_events_dict[activity] = preceding_events[:-1][::-1]

Check the arcs: 100%|██████████| 130/130 [00:00<00:00, 43113.74it/s]


In [74]:
succeeding_activities_updated

{'incidentskip_3': ['incidenttau_2', 'incidentskip_11', 'incidenttauSplit_12'],
 'incidentskip_6': ['New Incident'],
 'incidentskip_51': ['Closed Incident',
  'incidentskip_27',
  'incidenttauSplit_15',
  'incidentinit_loop_28',
  'incidentskip_14'],
 'incidentskip_39': ['Resolved'],
 'customertau_1': ['Closed Incident', 'customertauJoin_4', 'New Incident'],
 'incidentskip_21': ['incidenttauJoin_13'],
 'incidenttauJoin_32': ['Resolved'],
 'customerskip_13': [],
 'Awaiting User Info': ['Awaiting User Info', 'incidenttauJoin_16'],
 'incidentinit_loop_35': ['Awaiting Problem'],
 'incidentinit_loop_40': ['Awaiting Evidence'],
 'incidentskip_7': ['incidenttau_2', 'incidentskip_11', 'incidenttauSplit_12'],
 'Resolved': ['incidenttauSplit_31', 'incidentskip_30', 'incidenttauJoin_13'],
 'incidentinit_loop_23': ['Active'],
 'customerskip_7': ['Closed Incident'],
 'Awaiting Evidence': ['Awaiting Evidence', 'incidenttauJoin_32'],
 'incidenttauJoin_16': ['Closed Incident'],
 'incidenttauJoin_13': 

In [81]:
# create an empty dictionary to store all the precedding activities of an activity
succeeding_events_dict = {}
for activity in succeeding_activities_updated:
    # empty set for all the visited activities
    visited = set()
    # list for all currently preceding events
    succeeding_activities_dfs = []
    dfs(succeeding_activities_updated, visited, activity, succeeding_activities_dfs)
    # we need to remove the last element from the list because it corresponds to the activity itself
    succeeding_events_dict[activity] = succeeding_activities_dfs[:-1][::-1]

In [82]:
# delete all possible silent transitions from preceding_events_dict (dict where all direct preceeding events are stored)
succeeding_events_full = filter_silent_transitions(succeeding_events_dict, silent_transitions)

In [83]:
succeeding_events_full

{'Awaiting User Info': ['Closed Incident',
  'Awaiting Vendor',
  'Awaiting Problem',
  'Awaiting Evidence',
  'Resolved',
  'Active'],
 'Resolved': ['Closed Incident',
  'Active',
  'Awaiting User Info',
  'Awaiting Vendor',
  'Awaiting Problem',
  'Awaiting Evidence'],
 'Awaiting Evidence': ['Resolved',
  'Closed Incident',
  'Active',
  'Awaiting User Info',
  'Awaiting Vendor',
  'Awaiting Problem'],
 'New Incident': ['Closed Incident',
  'Awaiting Vendor',
  'Awaiting Problem',
  'Awaiting Evidence',
  'Resolved',
  'Active',
  'Awaiting User Info'],
 'Awaiting Problem': ['Awaiting Evidence',
  'Resolved',
  'Closed Incident',
  'Active',
  'Awaiting User Info',
  'Awaiting Vendor'],
 'Awaiting Vendor': ['Resolved',
  'Closed Incident',
  'Active',
  'Awaiting User Info',
  'Awaiting Problem',
  'Awaiting Evidence'],
 'Active': ['Closed Incident',
  'Awaiting Vendor',
  'Awaiting Problem',
  'Awaiting Evidence',
  'Resolved',
  'Awaiting User Info'],
 'Closed Incident': ['Awaiting

In [70]:
# delete all possible silent transitions from preceding_events_dict (dict where all direct preceeding events are stored)
filtered_preceeding_events_full = filter_silent_transitions(preceding_events_dict, silent_transitions)
# delete all possible silent transitions from filtered_preceeding_events (dict where only direct preceeding events are stored)
filtered_preceeding_events = filter_silent_transitions(preceding_activities, silent_transitions)
# delete all possible silent transitions from succeeding_activities_updated (dict where only direct preceeding events are stored)
filtered_succeeding_activities_updated = filter_silent_transitions(succeeding_activities_updated,silent_transitions)

In [68]:
preceding_activities

{'incidenttauJoin_16': ['incidentskip_21',
  'incidentskip_26',
  'incidentskip_22',
  'incidentskip_17'],
 'Resolved': ['incidenttauJoin_32', 'incidentskip_30'],
 'Awaiting User Info': ['incidentskip_20', 'incidentinit_loop_18'],
 'incidenttauSplit_31': ['incidentinit_loop_28', 'incidentskip_49'],
 'incidentskip_3': ['incidenttau_1'],
 'incidentskip_51': ['Closed Incident'],
 'incidenttauJoin_32': ['incidentskip_39',
  'incidentskip_33',
  'incidentskip_44',
  'incidentskip_43',
  'incidentskip_48'],
 'incidentskip_42': ['Awaiting Evidence'],
 'incidentskip_7': ['New Incident'],
 'Awaiting Vendor': ['incidentskip_47', 'incidentinit_loop_45'],
 'customertauJoin_4': ['customerskip_13', 'Closed Incident', 'customerskip_9'],
 'incidentinit_loop_40': ['incidentskip_38', 'incidentskip_34'],
 'incidentskip_47': ['Awaiting Vendor'],
 'incidentskip_48': ['Awaiting Vendor'],
 'incidentskip_38': ['Awaiting Problem'],
 'incidentskip_11': ['incidentskip_51', 'incidentinit_loop_9'],
 'incidentskip_

In [66]:
preceding_events_dict

{'incidenttauJoin_16': ['incidentskip_17',
  'incidentskip_22',
  'incidentskip_26',
  'Active',
  'incidentskip_25',
  'incidentinit_loop_23',
  'incidentskip_21',
  'Awaiting User Info',
  'incidentinit_loop_18',
  'incidenttauSplit_15',
  'incidenttauSplit_12',
  'incidentskip_51',
  'Closed Incident',
  'incidentskip_11',
  'incidentinit_loop_9',
  'incidentskip_7',
  'New Incident',
  'incidentinit_loop_4',
  'customerskip_12',
  'customerinit_loop_10',
  'incidentskip_6',
  'incidentskip_3',
  'incidenttau_1',
  'incidenttauJoin_13',
  'incidentskip_27',
  'incidentskip_50',
  'Resolved',
  'incidentskip_30',
  'incidenttauJoin_32',
  'incidentskip_48',
  'Awaiting Vendor',
  'incidentinit_loop_45',
  'incidentskip_47',
  'incidentskip_43',
  'Awaiting Evidence',
  'incidentskip_42',
  'incidentinit_loop_40',
  'incidentskip_44',
  'incidentskip_33',
  'incidentskip_39',
  'incidentskip_34',
  'incidentskip_38',
  'Awaiting Problem',
  'incidentskip_37',
  'incidentinit_loop_35',

In [67]:
succeeding_activities_updated

{'incidentskip_3': ['incidenttau_2', 'incidentskip_11', 'incidenttauSplit_12'],
 'incidentskip_6': ['New Incident'],
 'incidentskip_51': ['Closed Incident',
  'incidentskip_27',
  'incidenttauSplit_15',
  'incidentinit_loop_28',
  'incidentskip_14'],
 'incidentskip_39': ['Resolved'],
 'customertau_1': ['Closed Incident', 'customertauJoin_4', 'New Incident'],
 'incidentskip_21': ['incidenttauJoin_13'],
 'incidenttauJoin_32': ['Resolved'],
 'customerskip_13': [],
 'Awaiting User Info': ['Awaiting User Info', 'incidenttauJoin_16'],
 'incidentinit_loop_35': ['Awaiting Problem'],
 'incidentinit_loop_40': ['Awaiting Evidence'],
 'incidentskip_7': ['incidenttau_2', 'incidentskip_11', 'incidenttauSplit_12'],
 'Resolved': ['incidenttauSplit_31', 'incidentskip_30', 'incidenttauJoin_13'],
 'incidentinit_loop_23': ['Active'],
 'customerskip_7': ['Closed Incident'],
 'Awaiting Evidence': ['Awaiting Evidence', 'incidenttauJoin_32'],
 'incidenttauJoin_16': ['Closed Incident'],
 'incidenttauJoin_13': 

In [71]:
filtered_preceeding_events_full

{'Resolved': ['Awaiting Vendor',
  'Awaiting Evidence',
  'Awaiting Problem',
  'Closed Incident',
  'New Incident',
  'Active',
  'Awaiting User Info'],
 'Awaiting User Info': ['Closed Incident',
  'New Incident',
  'Resolved',
  'Awaiting Vendor',
  'Awaiting Evidence',
  'Awaiting Problem',
  'Active'],
 'Awaiting Vendor': ['Closed Incident',
  'New Incident',
  'Resolved',
  'Awaiting Evidence',
  'Awaiting Problem',
  'Active',
  'Awaiting User Info'],
 'Active': ['Closed Incident',
  'New Incident',
  'Resolved',
  'Awaiting Vendor',
  'Awaiting Evidence',
  'Awaiting Problem',
  'Awaiting User Info'],
 'Closed Incident': ['Resolved',
  'Awaiting Vendor',
  'Awaiting Evidence',
  'Awaiting Problem',
  'Active',
  'Awaiting User Info',
  'New Incident'],
 'Awaiting Problem': ['Closed Incident',
  'New Incident',
  'Resolved',
  'Awaiting Vendor',
  'Awaiting Evidence',
  'Active',
  'Awaiting User Info'],
 'New Incident': [],
 'Awaiting Evidence': ['Awaiting Problem',
  'Closed In

In [72]:
filtered_preceeding_events

{'Resolved': [],
 'Awaiting User Info': [],
 'Awaiting Vendor': [],
 'Active': [],
 'Closed Incident': [],
 'Awaiting Problem': [],
 'New Incident': [],
 'Awaiting Evidence': []}

In [73]:
filtered_succeeding_activities_updated

{'Awaiting User Info': ['Awaiting User Info'],
 'Resolved': [],
 'Awaiting Evidence': ['Awaiting Evidence'],
 'New Incident': ['New Incident', 'New Incident'],
 'Awaiting Problem': ['Awaiting Problem'],
 'Awaiting Vendor': ['Awaiting Vendor'],
 'Active': ['Active'],
 'Closed Incident': ['Closed Incident']}

In [60]:
def sample_traces(ocel, ocpn, amount, length = None, save_path = None):
    """
    Function to generate a sample of traces from an object-centric petri net.
    :param ocel: given OCEL-log, type: OCEL-Log
    :param ocpn: given object-centric petri net, type: ObjectCentricPetriNet
    :param amount: amount of traces to be generated, type: int
    :param length: maximum length of the traces to be generated, if not given, gets generated as double the average length in th log, type: int
    :return: list of sampled traces in lists, type: list
    """
   # we create another dictionary that only contains the the value inside the list to be able to derive the case
    mapping_dict = {key: ocel.process_execution_mappings[key][0] for key in ocel.process_execution_mappings}
    # we generate a new column in the class (log) that contains the process execution (case) number via the generated dictionary
    ocel.log.log['event_execution'] = ocel.log.log.index.map(mapping_dict)
    # dictionary to store each activity as key and a list of its prior states/places as value
    targets = {}
    # dictionary to store each activity as key and a list of its following states/places as value
    sources = {}
    for arc in tqdm(ocpn.arcs, desc="Check the arcs"):
        # for each arc, check if our target is a valid transition
        if arc.target in ocpn.transitions:
            # load all the prior places of a valid transition into a dictionary, where the key is the transition and the value
            # a list of all directly prior places
            if arc.target.name in targets:
                targets[arc.target.name].append(arc.source.name)
            else:
                targets[arc.target.name] = [arc.source.name]
        if arc.source in ocpn.transitions:
            # load all the following places of a valid transition into a dictionary, where the key is the transition and the value
            # a list of all directly following places
            if arc.source.name in sources:
                sources[arc.source.name].append(arc.target.name)
            else:
                sources[arc.source.name] = [arc.target.name]
    # generate an empty dictionary to store the directly preceeding transition of an activity
    preceding_activities = {}
    # use the key and value of targets and source to generate the dictionary
    for target_key, target_value in targets.items():
        preceding_activities[target_key] = []
        for source_key, source_value in sources.items():
            for element in target_value:
                if element in source_value:
                    preceding_activities[target_key].append(source_key)
                    break
    # generate an empty dictionary to store the directly succeeding transition of an activity
    succeeding_activities = {}
    for source_key, source_value in sources.items():
        succeeding_activities[source_key] = []
        for target_key, target_value in targets.items():
            for element in source_value:
                if element in target_value:
                    succeeding_activities[source_key].append(target_key)
                    break
    # store the name of all silent transitions in the log
    silent_transitions = [x.name for x in ocpn.transitions if x.silent]
    # replace the silent transitions in the succeeding activities dictionary by creating a new dictionary to store the modified values
    succeeding_activities_updated = {}
    # Iterate through the dictionary
    for key, values in succeeding_activities.items():
        # Create a list to store the modified values for this key
        new_values = []
        # Iterate through the values of each key
        for i in range(len(values)):
            # Check if the value is in the list of silent transitions
            if values[i] in silent_transitions:
                # Replace the value with the corresponding value from the dictionary
                new_values.extend(succeeding_activities[values[i]])
            else:
                # If the value is not in the list of silent transitions, add it to the new list
                new_values.append(values[i])
        # Add the modified values to the new dictionary
        succeeding_activities_updated[key] = new_values
    # create an empty dictionary to store all the precedding activities of an activity
    preceding_events_dict = {}
    # use a depth-first search (DFS) algorithm to traverse the activity graph and
    # create a list of all preceding events for each activity in the dictionary for directly preceding activities
    for activity in preceding_activities:
        # empty set for all the visited activities
        visited = set()
        # list for all currently preceding events
        preceding_events = []
        dfs(preceding_activities, visited, activity, preceding_events)
        # we need to remove the last element from the list because it corresponds to the activity itself
        preceding_events_dict[activity] = preceding_events[:-1][::-1]
    # delete all possible silent transitions from preceding_events_dict (dict where all direct preceeding events are stored)
    filtered_preceeding_events_full = filter_silent_transitions(preceding_events_dict, silent_transitions)
    # delete all possible silent transitions from filtered_preceeding_events (dict where only direct preceeding events are stored)
    filtered_preceeding_events = filter_silent_transitions(preceding_activities, silent_transitions)
    # delete all possible silent transitions from succeeding_activities_updated (dict where only direct preceeding events are stored)
    filtered_succeeding_activities_updated = filter_silent_transitions(succeeding_activities_updated,silent_transitions)
    #get average length of an process execution in the original log
    # group by event_execution and count the number of rows in each group
    grouped = ocel.log.log.groupby('event_execution').count()

    # find the event_execution with the lowest and highest number of rows to filter out outliers that distort the length
    lowest = grouped['event_activity'].idxmin()
    highest = grouped['event_activity'].idxmax()

    # filter out the rows corresponding to the process executions with the lowest and highest number of rows to filter out outliers
    df_filtered = ocel.log.log[(ocel.log.log['event_execution'] != lowest) & (ocel.log.log['event_execution'] != highest)]

    # group by event_execution and count the number of rows in each group again
    grouped_filtered = df_filtered.groupby('event_execution').count()

    # calculate the average number of activities per process execution
    avg_activities = grouped_filtered.mean()['event_activity']
    if length == None:
        limit_length = np.round(2 * avg_activities).astype(int)
    else:
        limit_length = length
    #define an empty list for the event log
    event_log_sampled = []
    # store the name of all non-silent transitions in the log to check for variant model in if else statements
    non_silent_transitions = [x.name for x in ocpn.transitions if not x.silent]
    #sample the desired amount of traces
    for j in tqdm(range(amount), desc="Generate the traces"):
        #get a list of all activities that need to be executed before the process is finished
        end_activities = [key for key, value in filtered_succeeding_activities_updated.items() if not value]
        # if all succeeding events equal all preceeding events, we have a flower model and almost everything is enabled all the time
        if filtered_preceeding_events==filtered_succeeding_activities_updated:
            enabled = list(np.unique(ocel.log.log.event_activity))
        #check if one of the non-silent transitions ends with a number, then we have a variant model
        elif non_silent_transitions[0][-1].isdigit():
            #generate the variants
            ocel.variants
            # get the amount of variants in the log
            amount_variants = len(np.unique(ocel.log.log['event_variant']))
            # Generate a random integer between 0 and amount of variants -1 to generate the path we are using
            trace_number = random.randint(0, amount_variants-1)
            # Use a list comprehension to filter out end activities that don't end with the trace number, also checks string length to avoid matching numbers containing the target number.
            end_activities = [x for x in end_activities if x.endswith(str(trace_number)) and (len(x) == len(str(trace_number)) or not x[-len(str(trace_number))-1].isdigit())]
            #generate the list of enabled activities
            enabled = [key for key, value in filtered_preceeding_events_full.items() if not value]
            # Use a list comprehension to filter out enabled activities that don't end with the trace number, also checks string length to avoid matching numbers containing the target number.
            enabled = [x for x in enabled if x.endswith(str(trace_number)) and (len(x) == len(str(trace_number)) or not x[-len(str(trace_number))-1].isdigit())]
        else:
            # list for all the activities that are enabled, starting from all activities that do not have any preceeding activity
            enabled = [key for key, value in filtered_preceeding_events_full.items() if not value]
        # initialise a list of already executed activities in this trace
        trace = []
        # the maximum length of a trace is the double of the average trace length in the log
        for i in range(limit_length):
            # get a random activity from the enabled cases to add to the trace
            # check if there are any enabled activities
            if enabled:
                # generate a random index for an enabled activity
                idx = random.randint(0, len(enabled)-1)

                #get the current activity
                executed_activity = enabled[idx]

                # add the activity at the random index to the trace
                trace.append(executed_activity)

                # remove the activity from enabled activities
                enabled.remove(executed_activity)
            # get all possible new enabled activities
            possible_enabled = filtered_succeeding_activities_updated[executed_activity]
            # check if each activity has more than one directly preceeding state
            for i in range(len(possible_enabled)):
                # check if an event has two or more activities that need to be executed before the event can take place, if not add events to enabled
                if len(filtered_preceeding_events[possible_enabled[i]]) < 2:
                    enabled.append(possible_enabled[i])
                # if all succeeding events equal all preceeding events, we have a flower model and almost everything is enabled all the time
                elif filtered_preceeding_events[possible_enabled[i]] == filtered_succeeding_activities_updated[
                    possible_enabled[i]]:
                    enabled.append(possible_enabled[i])
                else:
                    # if yes, check if all the needed activities have already been performed in this trace
                    if all(elem in trace for elem in filtered_preceeding_events[possible_enabled[i]]):
                        enabled.append(possible_enabled[i])
            #check if activity is an end activity
            if executed_activity in end_activities:
                #if yes get all activites that need to be executed beforehand
                preceeding_activities = filtered_preceeding_events_full[executed_activity]
                #delete these activites from the enabled list if a loop may be possible
                enabled = [x for x in enabled if x not in preceeding_activities]
            # delete all duplicates from the enabled list
            enabled = list(set(enabled))
            #check if all end activities have been performed and if end_activities is non empty
            if end_activities and all(x in trace for x in end_activities):
                break
        event_log_sampled.append(trace)
    if save_path is not None:
        with open(save_path, "w", encoding="utf-8") as file:
            for sentence in event_log_sampled:
                line = " ".join(sentence) + "\n"
                file.write(line)
    return event_log_sampled

In [23]:
filtered_preceeding_events_full

{'incidentskip_7': ['New Incident',
  'incidentinit_loop_4',
  'incidenttau_1',
  'customerinit_loop_10',
  'customertau_1',
  'incidentskip_6',
  'customerskip_12'],
 'incidenttauSplit_15': ['incidenttauSplit_12',
  'incidentskip_51',
  'Closed Incident',
  'incidenttauJoin_13',
  'incidentskip_27',
  'incidenttauJoin_16',
  'incidentskip_26',
  'Active',
  'incidentskip_25',
  'incidentinit_loop_23',
  'incidentskip_17',
  'incidentskip_22',
  'incidentskip_21',
  'Awaiting User Info',
  'incidentskip_20',
  'incidentinit_loop_18',
  'incidentskip_14',
  'incidentskip_50',
  'Resolved',
  'incidenttauJoin_32',
  'incidentskip_44',
  'incidentskip_43',
  'Awaiting Evidence',
  'incidentskip_42',
  'incidentinit_loop_40',
  'incidentskip_33',
  'incidentskip_48',
  'Awaiting Vendor',
  'incidentinit_loop_45',
  'incidentskip_47',
  'incidentskip_39',
  'incidentskip_38',
  'Awaiting Problem',
  'incidentskip_37',
  'incidentinit_loop_35',
  'incidentskip_34',
  'incidenttauSplit_31',
 

In [31]:
filtered_preceeding_events_full['New Incident']

['incidentinit_loop_4',
 'incidenttau_1',
 'customerinit_loop_10',
 'customertau_1',
 'incidentskip_6',
 'customerskip_12']

In [24]:
filtered_preceeding_events

{'incidentskip_7': ['New Incident'],
 'incidenttauSplit_15': ['incidenttauSplit_12'],
 'incidentskip_22': ['incidenttauSplit_15'],
 'New Incident': ['customerskip_12',
  'incidentskip_6',
  'customerinit_loop_10',
  'incidentinit_loop_4'],
 'incidentskip_25': ['Active'],
 'incidentinit_loop_40': ['incidentskip_34', 'incidentskip_38'],
 'Awaiting Problem': ['incidentinit_loop_35', 'incidentskip_37'],
 'incidentskip_33': ['incidenttauSplit_31'],
 'Resolved': ['incidentskip_30', 'incidenttauJoin_32'],
 'incidentskip_38': ['Awaiting Problem'],
 'incidentinit_loop_45': ['incidenttauSplit_31'],
 'incidentskip_49': ['Resolved'],
 'incidenttauJoin_16': ['incidentskip_21',
  'incidentskip_22',
  'incidentskip_17',
  'incidentskip_26'],
 'incidentskip_17': ['incidenttauSplit_15'],
 'incidenttau_2': ['incidentskip_8', 'incidentskip_52'],
 'incidenttauJoin_32': ['incidentskip_39',
  'incidentskip_48',
  'incidentskip_33',
  'incidentskip_43',
  'incidentskip_44'],
 'incidentskip_26': ['Active'],
 

In [25]:
filtered_succeeding_activities_updated

{'incidenttau_2': [],
 'incidentskip_3': ['incidenttau_2', 'incidentskip_11', 'incidenttauSplit_12'],
 'Closed Incident': ['Closed Incident',
  'incidentskip_11',
  'incidenttauSplit_12',
  'incidenttau_2'],
 'incidenttauSplit_15': ['incidenttauJoin_16',
  'incidenttauJoin_16',
  'Active',
  'Awaiting User Info'],
 'incidentinit_loop_18': ['Awaiting User Info'],
 'customerskip_7': ['Closed Incident'],
 'incidentskip_47': ['Awaiting Vendor'],
 'incidentskip_50': ['Closed Incident'],
 'incidentskip_7': ['incidenttau_2', 'incidentskip_11', 'incidenttauSplit_12'],
 'customerskip_12': ['New Incident'],
 'incidentskip_39': ['Resolved'],
 'incidenttau_1': ['incidentskip_8', 'incidentinit_loop_9', 'New Incident'],
 'incidentskip_48': ['Resolved'],
 'Awaiting Vendor': ['Awaiting Vendor', 'incidenttauJoin_32'],
 'incidenttauSplit_31': ['incidenttauJoin_32',
  'Awaiting Vendor',
  'Awaiting Problem',
  'incidenttauJoin_32',
  'incidentinit_loop_40',
  'incidentskip_39'],
 'incidentskip_33': ['Res

In [63]:
train_log = sample_traces(ocel, ocpn, 1)

Check the arcs: 100%|██████████| 130/130 [00:00<00:00, 55621.70it/s]
Generate the traces: 100%|██████████| 1/1 [00:00<00:00, 996.27it/s]


In [64]:
train_log

[['New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident',
  'New Incident']]