# Negative Weighted Events

In this notebook, we adapt the negative events-based measure

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
from ocpa.objects.log.importer.ocel import factory as ocel_import_factory
from ocpa.algo.discovery.ocpn import algorithm as ocpn_discovery_factory
from src.utils import get_happy_path_log, create_flower_model, generate_variant_model
from ocpa.objects.log.importer.csv import factory as ocel_import_factory_csv
#from models.negative_events_measure_without_weighting import negative_events_without_weighting
import pickle

In [6]:
import numpy as np
from tqdm import tqdm

#function to filter out the silent transitions defined by a list from a given dictionary
def filter_silent_transitions(dic,silent_transitions):
    """
    Function to filter out the silent transitions defined by a list from a given dictionary.
    :param dic: dictionary to be filtered, type: dictionary
    :param silent_transitions: list of silent transitions in an ocel log, type: list
    :return updated_dictionary: filtered dictionary, type: dictionary
    """
    updated_dictionary = {}
    for key, values in dic.items():
        if key not in silent_transitions:
            new_values = [val for val in values if val not in silent_transitions]
            updated_dictionary[key] = new_values
    return updated_dictionary

#recursive implementation of a depth-first search (DFS) algorithm
def dfs(graph, visited, activity, preceding_events):
    """
    Function to perform a depth-first search (DFS) algorithm on the activity graph.
    :param graph: activity graph, type: dictionary
    :param visited: set of already visited nodes, type: set
    :param activity: current activity, type: string
    :param preceding_events: list to store the preceding events, type: list
    """
    #takes as input the activity graph (represented as a dictionary), a set of visited nodes, the current activity, and a list to store the preceding events.
    visited.add(activity)
    for preceding_event in graph[activity]:
        #eighboring activity has not been visited yet, the algorithm visits it by calling the dfs function with the neighboring activity as the current activity.
        if preceding_event not in visited:
            dfs(graph, visited, preceding_event, preceding_events)
    preceding_events.append(activity)

def negative_events_without_weighting(ocel,ocpn):
    """
    Function to calculate the negative events measure without weighting based on the used places inside an object-centric petri-net.
    :param ocel: object-centric event log for which the measure should be calculated, type: ocel-log
    :param ocpn: corresponding object-centric petri-net, type: object-centric petri-net
    :return generalization: final value of the formula, type: float rounded to 4 digits
    """
    #since the process execution mappings have lists of length one,
    #we create another dictionary that only contains the the value inside the list to be able to derive the case
    mapping_dict = {key: ocel.process_execution_mappings[key][0] for key in ocel.process_execution_mappings}
    #we generate a new column in the class (log) that contains the process execution (case) number via the generated dictionary
    ocel.log.log['event_execution'] = ocel.log.log.index.map(mapping_dict)
    #generate a list of unique events in the event log
    events = np.unique(ocel.log.log.event_activity)
    # dictionary to store each activity as key and a list of its prior states/places as value
    targets = {}
    # dictionary to store each activity as key and a list of its following states/places as value
    sources = {}
    for arc in tqdm(ocpn.arcs, desc="Check the arcs"):
        # for each arc, check if our target is a valid transition
        if arc.target in ocpn.transitions:
            # load all the prior places of a valid transition into a dictionary, where the key is the transition and the value
            # a list of all directly prior places
            if arc.target.name in targets:
                targets[arc.target.name].append(arc.source.name)
            else:
                targets[arc.target.name] = [arc.source.name]
        if arc.source in ocpn.transitions:
            # load all the following places of a valid transition into a dictionary, where the key is the transition and the value
            # a list of all directly following places
            if arc.source.name in sources:
                sources[arc.source.name].append(arc.target.name)
            else:
                sources[arc.source.name] = [arc.target.name]
    #generate an empty dictionary to store the directly preceeding transition of an activity
    preceding_activities = {}
    #use the key and value of targets and source to generate the dictionary
    for target_key, target_value in targets.items():
        preceding_activities[target_key] = []
        for source_key, source_value in sources.items():
            for element in target_value:
                if element in source_value:
                    preceding_activities[target_key].append(source_key)
                    break
    #generate an empty dictionary to store the directly succeeding transition of an activity
    succeeding_activities = {}
    for source_key, source_value in sources.items():
        succeeding_activities[source_key] = []
        for target_key, target_value in targets.items():
            for element in source_value:
                if element in target_value:
                    succeeding_activities[source_key].append(target_key)
                    break
    #store the name of all silent transitions in the log
    silent_transitions = [x.name for x in ocpn.transitions if  x.silent]
    #replace the silent transitions in the succeeding activities dictionary by creating a new dictionary to store the modified values
    succeeding_activities_updated = {}
    # Iterate through the dictionary
    for key, values in succeeding_activities.items():
        # Create a list to store the modified values for this key
        new_values = []
        # Iterate through the values of each key
        for i in range(len(values)):
            # Check if the value is in the list of silent transitions
            if values[i] in silent_transitions:
                # Replace the value with the corresponding value from the dictionary
                new_values.extend(succeeding_activities[values[i]])
            else:
                # If the value is not in the list of silent transitions, add it to the new list
                new_values.append(values[i])
        # Add the modified values to the new dictionary
        succeeding_activities_updated[key] = new_values
    #create an empty dictionary to store all the precedding activities of an activity
    preceding_events_dict = {}
    # use a depth-first search (DFS) algorithm to traverse the activity graph and
    #create a list of all preceding events for each activity in the dictionary for directly preceding activities
    for activity in preceding_activities:
        #empty set for all the visited activities
        visited = set()
        #list for all currently preceding events
        preceding_events = []
        dfs(preceding_activities, visited, activity, preceding_events)
        #we need to remove the last element from the list because it corresponds to the activity itself
        preceding_events_dict[activity] = preceding_events[:-1][::-1]
    #delete all possible silent transitions from preceding_events_dict (dict where all direct preceeding events are stored)
    filtered_preceeding_events_full = filter_silent_transitions(preceding_events_dict,silent_transitions)
    #delete all possible silent transitions from filtered_preceeding_events (dict where only direct preceeding events are stored)
    filtered_preceeding_events = filter_silent_transitions(preceding_activities,silent_transitions)
    #delete all possible silent transitions from succeeding_activities_updated (dict where only direct preceeding events are stored)
    filtered_succeeding_activities_updated = filter_silent_transitions(succeeding_activities_updated,silent_transitions)
    #generate a grouped df such that we can iterate through the log case by case (sort by timestamp to ensure the correct process sequence)
    grouped_df = ocel.log.log.sort_values('event_timestamp').groupby('event_execution')
    DG = 0 #Disallowed Generalization intialisation
    AG = 0 #Allowed Generalization intialisation
    # Iterate over each group
    for group_name, group_df in tqdm(grouped_df, total=len(grouped_df),desc="Calculate Generalization for all process executions"):
        # Iterate over each row in the group
        # list for all the activities that are enabled, starting from all activities that do not have any preceeding activity
        enabled = [key for key, value in filtered_preceeding_events_full.items() if not value]
        # initialise a list of already executed activities in this trace
        trace =[]
        #iterate through each case/process execution
        for index, row in group_df.iterrows():
            print(row['event_activity'])
            # Get the current negative events based on the current activity to be executed
            negative_activities = [x for x in events if x != row['event_activity']]
            print(negative_activities)
            #it may happen that an activity is not present in the model but nevertheless executed in the log
            if row['event_activity'] in enabled:
                #check which elements in the negative activity list are enabled outside of the current activity
                enabled.remove(row['event_activity'])
            #get all the negative events that can not be executed in the process model at the moment
            disallowed = [value for value in negative_activities if value not in enabled]
            #add activity that has been executed to trace
            trace.append(row['event_activity'])
            print(trace)
            #update the values of allowed and disallowed generalizations based on the paper logic
            AG = AG + len(enabled)
            DG = DG + len(disallowed)
            #may happen that activities in the log are not in the process model
            if row['event_activity'] in filtered_succeeding_activities_updated:
                #get all possible new enabled activities
                possible_enabled = filtered_succeeding_activities_updated[row['event_activity']]
                #check if each activity has more than one directly preceeding state
                for i in range(len(possible_enabled)):
                    #check if an event has two or more activities that need to be executed before the event can take place, if not add events to enabled
                    if len(filtered_preceeding_events[possible_enabled[i]]) < 2:
                        enabled.append(possible_enabled[i])
                    else:
                        #if yes, check if all the needed activities have already been performed in this trace
                        if all(elem in trace for elem in filtered_preceeding_events[possible_enabled[i]]):
                            enabled.append(possible_enabled[i])
            #extend the list with all elements that do not have any preceeding activity and are therefore enabled anyways in our process model
            enabled.extend([key for key, value in filtered_preceeding_events_full.items() if not value])
            #delete all duplicates from the enabled list
            enabled = list(set(enabled))
        break
    #calculate the generalization based on the paper
    generalization = AG / (AG+DG)
    return np.round(generalization,4)

# P2P Log

### Standard Petri Net

In a first step, we load the OCEL-log into the notebook and generate the object-centric petri net.

In [7]:
filename = "../src/data/jsonocel/p2p-normal.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})

In [17]:
ots = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]
ocpn = create_flower_model(filename,ots)

In [18]:
#since the process execution mappings have lists of length one,
#we create another dictionary that only contains the the value inside the list to be able to derive the case
mapping_dict = {key: ocel.process_execution_mappings[key][0] for key in ocel.process_execution_mappings}
#we generate a new column in the class (log) that contains the process execution (case) number via the generated dictionary
ocel.log.log['event_execution'] = ocel.log.log.index.map(mapping_dict)
#generate a list of unique events in the event log
events = np.unique(ocel.log.log.event_activity)
# dictionary to store each activity as key and a list of its prior states/places as value
targets = {}
# dictionary to store each activity as key and a list of its following states/places as value
sources = {}
for arc in tqdm(ocpn.arcs, desc="Check the arcs"):
    # for each arc, check if our target is a valid transition
    if arc.target in ocpn.transitions:
        # load all the prior places of a valid transition into a dictionary, where the key is the transition and the value
        # a list of all directly prior places
        if arc.target.name in targets:
            targets[arc.target.name].append(arc.source.name)
        else:
            targets[arc.target.name] = [arc.source.name]
    if arc.source in ocpn.transitions:
        # load all the following places of a valid transition into a dictionary, where the key is the transition and the value
        # a list of all directly following places
        if arc.source.name in sources:
            sources[arc.source.name].append(arc.target.name)
        else:
            sources[arc.source.name] = [arc.target.name]
#generate an empty dictionary to store the directly preceeding transition of an activity
preceding_activities = {}
#use the key and value of targets and source to generate the dictionary
for target_key, target_value in targets.items():
    preceding_activities[target_key] = []
    for source_key, source_value in sources.items():
        for element in target_value:
            if element in source_value:
                preceding_activities[target_key].append(source_key)
                break
#generate an empty dictionary to store the directly succeeding transition of an activity
succeeding_activities = {}
for source_key, source_value in sources.items():
    succeeding_activities[source_key] = []
    for target_key, target_value in targets.items():
        for element in source_value:
            if element in target_value:
                succeeding_activities[source_key].append(target_key)
                break
#store the name of all silent transitions in the log
silent_transitions = [x.name for x in ocpn.transitions if  x.silent]
#replace the silent transitions in the succeeding activities dictionary by creating a new dictionary to store the modified values
succeeding_activities_updated = {}
# Iterate through the dictionary
for key, values in succeeding_activities.items():
    # Create a list to store the modified values for this key
    new_values = []
    # Iterate through the values of each key
    for i in range(len(values)):
        # Check if the value is in the list of silent transitions
        if values[i] in silent_transitions:
            # Replace the value with the corresponding value from the dictionary
            new_values.extend(succeeding_activities[values[i]])
        else:
            # If the value is not in the list of silent transitions, add it to the new list
            new_values.append(values[i])
    # Add the modified values to the new dictionary
    succeeding_activities_updated[key] = new_values
#create an empty dictionary to store all the precedding activities of an activity
preceding_events_dict = {}
# use a depth-first search (DFS) algorithm to traverse the activity graph and
#create a list of all preceding events for each activity in the dictionary for directly preceding activities
for activity in preceding_activities:
    #empty set for all the visited activities
    visited = set()
    #list for all currently preceding events
    preceding_events = []
    dfs(preceding_activities, visited, activity, preceding_events)
    #we need to remove the last element from the list because it corresponds to the activity itself
    preceding_events_dict[activity] = preceding_events[:-1][::-1]
#delete all possible silent transitions from preceding_events_dict (dict where all direct preceeding events are stored)
filtered_preceeding_events_full = filter_silent_transitions(preceding_events_dict,silent_transitions)
#delete all possible silent transitions from filtered_preceeding_events (dict where only direct preceeding events are stored)
filtered_preceeding_events = filter_silent_transitions(preceding_activities,silent_transitions)
#delete all possible silent transitions from succeeding_activities_updated (dict where only direct preceeding events are stored)
filtered_succeeding_activities_updated = filter_silent_transitions(succeeding_activities_updated,silent_transitions)

Check the arcs: 100%|██████████| 38/38 [00:00<?, ?it/s]


In [19]:
filtered_preceeding_events_full

{'Create Purchase Order': ['Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt',
  'Receive Invoice',
  'Clear Invoice'],
 'Goods Issue': ['Create Purchase Order',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt',
  'Receive Invoice',
  'Clear Invoice'],
 'Plan Goods Issue': ['Create Purchase Order',
  'Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt',
  'Receive Invoice',
  'Clear Invoice'],
 'Receive Goods': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt',
  'Receive Invoice',
  'Clear Invoice'],
 'Verify Material': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Create Purchase Requisition',
  'Issue Goods Receipt',
  'Receive Invoice',
  'Cl

In [20]:
filtered_preceeding_events

{'Create Purchase Order': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Receive Invoice',
  'Clear Invoice',
  'Issue Goods Receipt'],
 'Goods Issue': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt'],
 'Plan Goods Issue': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt'],
 'Receive Goods': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Receive Invoice',
  'Clear Invoice',
  'Issue Goods Receipt'],
 'Verify Material': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt'],

In [21]:
filtered_succeeding_activities_updated

{'Create Purchase Order': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Receive Invoice',
  'Clear Invoice',
  'Issue Goods Receipt'],
 'Goods Issue': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt'],
 'Plan Goods Issue': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt'],
 'Receive Goods': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Receive Invoice',
  'Clear Invoice',
  'Issue Goods Receipt'],
 'Verify Material': ['Create Purchase Order',
  'Goods Issue',
  'Plan Goods Issue',
  'Receive Goods',
  'Verify Material',
  'Create Purchase Requisition',
  'Issue Goods Receipt'],

In [53]:
value = negative_events_without_weighting (ocel, ocpn)
value

Check the arcs: 100%|██████████| 40/40 [00:00<?, ?it/s]
Calculate Generalization for all process executions: 100%|██████████| 80/80 [00:00<00:00, 831.46it/s]


0.1845

### Happy Path Petri Net

In [54]:
happy_path__ocel = get_happy_path_log(filename)

In [55]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [56]:
value = negative_events_without_weighting (ocel, happy_path_ocpn)
value

Check the arcs: 100%|██████████| 38/38 [00:00<?, ?it/s]
Calculate Generalization for all process executions: 100%|██████████| 80/80 [00:00<00:00, 788.21it/s]


0.1958

### Flower Model Petri Net

In [57]:
ots = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]

In [58]:
flower_ocpn = create_flower_model(filename,ots)

In [59]:
value = negative_events_without_weighting (ocel, flower_ocpn)
value

Check the arcs: 100%|██████████| 38/38 [00:00<?, ?it/s]
Calculate Generalization for all process executions: 100%|██████████| 80/80 [00:00<00:00, 806.14it/s]


0.0417

### Variant Model Petri Net

Import the primarly generated variant log for our measure computation, while we generate the variant model with the original log.

In [60]:
filename_variant = "../src/data/csv/p2p_variant_log.csv" 
object_types = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_variant = ocel_import_factory_csv.apply(file_path=filename_variant, parameters=parameters)

In [61]:
filename = "../src/data/jsonocel/p2p-normal.jsonocel"
ots = ["PURCHORD","INVOICE","PURCHREQ","MATERIAL","GDSRCPT"]
ocel = ocel_import_factory.apply(filename)
variant_ocpn = generate_variant_model(ocel,save_path_logs='../src/data/csv/p2p_variants/p2p_variant',object_types = ots ,save_path_visuals=f"../reports/figures/p2p_variant_total.svg" )

Generating Variant Models: 100%|██████████| 20/20 [00:02<00:00,  9.39it/s]
Processing Variant Nets: 100%|██████████| 20/20 [00:00<00:00, 11168.43it/s]

#########Start generating Object-Centric Petri Net#########
#########Finished generating Object-Centric Petri Net#########





In [62]:
value = negative_events_without_weighting (ocel, variant_ocpn)
value

Check the arcs: 100%|██████████| 760/760 [00:00<00:00, 15714.04it/s]
Calculate Generalization for all process executions: 100%|██████████| 80/80 [00:00<00:00, 638.20it/s]


0.7143

# BPI-Challenge 2017 Log

### Standard Petri Net

In a first step, we load the OCEL-log into the notebook and generate the object-centric petri net.

In [63]:
filename = "../src/data/jsonocel/BPI2017-Final.jsonocel"
ocel = ocel_import_factory.apply(filename)
ocpn = ocpn_discovery_factory.apply(ocel, parameters={"debug": False})

In [65]:
value = negative_events_without_weighting (ocel, ocpn)
value

Check the arcs: 100%|██████████| 120/120 [00:00<00:00, 72628.64it/s]
Calculate Generalization for all process executions: 100%|██████████| 31509/31509 [01:46<00:00, 296.80it/s]


0.3569

### Happy Path Petri Net

In [66]:
happy_path__ocel = get_happy_path_log(filename)

In [67]:
happy_path_ocpn = ocpn_discovery_factory.apply(happy_path__ocel, parameters={"debug": False})

In [68]:
value = negative_events_without_weighting (ocel, happy_path_ocpn)
value

Check the arcs: 100%|██████████| 26/26 [00:00<?, ?it/s]
Calculate Generalization for all process executions: 100%|██████████| 31509/31509 [01:14<00:00, 423.16it/s]


0.1077

### Flower Model Petri Net

In [69]:
ots = ["application","offer"]

In [70]:
flower_ocpn = create_flower_model(filename,ots)

In [71]:
value = negative_events_without_weighting (ocel, flower_ocpn)
value

Check the arcs: 100%|██████████| 56/56 [00:00<00:00, 61166.93it/s]
Calculate Generalization for all process executions: 100%|██████████| 31509/31509 [02:01<00:00, 260.33it/s]


0.0

### Variant Model Petri Net

Import the primarly generated variant log for our measure computation, while we generate the variant model with the original log.

In [72]:
filename_variant = "../src/data/csv/bpi2017_variant_log.csv" 
object_types = ["application","offer"]
parameters = {"obj_names": object_types,
              "val_names": [],
              "act_name": "event_activity",
              "time_name": "event_timestamp",
              "sep": ","}
ocel_variant = ocel_import_factory_csv.apply(file_path=filename_variant, parameters=parameters)

We import the pickle file for the variant model of the bpi challenge that we generated in the process models notebook.

In [75]:
with open("../src/data/csv/bpi_variant_ocpn.pickle", "rb") as file:
    variant_ocpn = pickle.load(file)

print(variant_ocpn)

<ocpa.objects.oc_petri_net.obj.ObjectCentricPetriNet object at 0x000001EDFF0D7850>


In [76]:
value = negative_events_without_weighting (ocel, variant_ocpn)
value

Check the arcs:   0%|          | 935/214724 [00:23<1:28:30, 40.26it/s]


KeyboardInterrupt: 