# Imports

In [None]:
%pip install pandas==2.0.3
%pip install tqdm==4.66.1
%pip install pm4py==2.7.8.2
%pip install pygraphviz==1.11

In [1]:
import json
import pandas as pd
import pm4py
import networkx as nx
from networkx.algorithms import isomorphism
from enum import Enum, auto
from IPython.display import display, HTML
from typing import Tuple, Dict

from pm4py.objects.bpmn.obj import BPMN

# Process Mining

In [16]:
class ProcessingMode(Enum):
    MODEL = auto()

def discover_bpmn_graph(log_data, graph_prefix):        
    """Extract Petri net graph from log data."""
    logs_df = pd.DataFrame(log_data)   
    logs_df['timestamp'] = pd.to_datetime(logs_df['timestamp'])
    logs_df['logging_statement_id'] = logs_df['logging_statement_id'].astype(str)
    logs_df['case_id'] = '01'
    
    params = {
        'dependency_threshold': 0.45,
        'and_threshold': 0.65,
        'loop_two_threshold': 0.5,
        'activity_key': 'logging_statement_id',
        'case_id_key': 'case_id',
        'timestamp_key': 'timestamp'
    }
    net, im, fm = pm4py.discover_petri_net_heuristics(logs_df, **params)
    
    bpmn_graph = pm4py.convert_to_bpmn(net, im, fm)
    
    # Get all unique logging_statement_ids from DataFrame
    unique_logging_statement_ids = set(logs_df['logging_statement_id'].unique())

    # Check if all unique_logging_statement_ids are in the tasks of the BPMN graph
    bpmn_tasks = [node for node in bpmn_graph.get_nodes() if isinstance(node, BPMN.Task)]
    task_labels = set(task.get_name() for task in bpmn_tasks)

    missing_ids = unique_logging_statement_ids - task_labels

    if missing_ids:
        missing_ids_str = '<br>'.join(missing_ids)
        display(HTML(f"<span style='color: red;'>Missing logging_statement_ids in BPMN graph:<br>{missing_ids_str}</span>"))
    else:
        display(HTML(f"<span style='color: green;'>All logging_statement_ids accounted for</span>"))
            
    bpmn_graph = pm4py.convert_to_bpmn(net, im, fm)
    
    return bpmn_graph

def load_logs(base_dir, mode):
    """Load logs from a file."""
    with open(f'{base_dir}/processed_logs_mode_{mode.name}.json') as f:
        return json.load(f)

def get_unique_services_and_subprocesses(logs_A, logs_B):
    """Get unique services and subprocesses between two sets of logs."""
    unique_services = set(logs_A.keys()).union(set(logs_B.keys()))
    unique_subprocesses = {}
    for service in unique_services:
        subprocesses_A = set(logs_A.get(service, {}).keys())
        subprocesses_B = set(logs_B.get(service, {}).keys())
        unique_subprocesses[service] = subprocesses_A.union(subprocesses_B)
    return unique_services, unique_subprocesses

def create_empty_bpmn():
    """Create an empty BPMN graph with a single start event."""
    empty_bpmn = BPMN(name='Empty BPMN')
    return empty_bpmn

def process_graphs_and_save(logs, graph_name, all_process_graphs, unique_services, unique_subprocesses):
    """Generate process graphs and save them."""
    for mode in ProcessingMode:
        all_process_graphs.setdefault(graph_name, {}).setdefault(mode.name, {})
        for service in unique_services[mode]:
            for subprocess in unique_subprocesses[mode].get(service, {}):
                target_logs = logs[mode.name].get(service, {}).get(subprocess, {}).get('logs', [])
                
                 # Check if target_logs is empty
                if not target_logs:
                    print(f"No logs found for {service} -> {subprocess} in {mode.name}. Creating an empty BPMN graph.")
                    bpmn_graph = create_empty_bpmn()
                else:
                    graph_prefix = f'{graph_name.split()[-1]}'
                    bpmn_graph = discover_bpmn_graph(target_logs, graph_prefix)
                
                # Save net along with its initial and final markings
                all_process_graphs[graph_name][mode.name].setdefault(service, {})[subprocess] = {
                    'bpmn_graph': bpmn_graph,
                }

# Load logs
logs_A = {mode.name: load_logs('../Data/Reproduced Experiment/Preprocessed/Logs/Part B/Expected', mode) for mode in ProcessingMode}
logs_B = {mode.name: load_logs('../Data/Reproduced Experiment/Preprocessed/Logs/Part B/Faulty', mode) for mode in ProcessingMode}

# Get unique services and subprocesses for each mode
unique_services = {}
unique_subprocesses = {}
for mode in ProcessingMode:
    unique_services[mode], unique_subprocesses[mode] = get_unique_services_and_subprocesses(logs_A[mode.name], logs_B[mode.name])

# Initialize all_process_s dictionary
all_process_graphs = {}

# Process graphs and save them, also fill in for missing services and subprocesses
process_graphs_and_save(logs_A, 'Expected', all_process_graphs, unique_services, unique_subprocesses)
process_graphs_and_save(logs_B, 'Faulty', all_process_graphs, unique_services, unique_subprocesses)

display(HTML(f"<b><span style='color: blue;'>&emsp;&emsp; --> DONE</span></b>"))


# Gateway Identification Using Path-Based Signiture

In [17]:
import copy

def extract_gateways(bpmn_graph):
    """
    This function extracts gateways from a BPMN model and identifies them based on their path-based signatures.
    It maps each gateway to the unique paths leading from it, considering only Events and Tasks as path terminators.
    The paths are stored as tuples of node names, ensuring a consistent identification system.
    This approach allows for the identification of gateways even when only partial information about unique IDs is available,
    as it relies on the structural signature of the paths rather than solely on the IDs.
    The output is a sorted dictionary where each key represents a unique path structure (as a JSON string),
    and the value is a list of gateways sharing that structure. This forms the basis for consistent node identification across different BPMN models.
    """
    # Initialize dictionaries for storing paths by gateway and gateway structures.
    paths_by_gateway = {}
    structure_to_gateways = {}

    # Define a depth-first search (DFS) function to traverse the graph.
    def dfs(current_node, current_path, visited_gateways):
        # Check if current node is an Event or Task, marking the end of a path.
        if isinstance(current_node, (BPMN.Event, BPMN.Task)):
            current_path_tuple = tuple(current_path)
            paths_by_gateway[start_gateway_id].append(current_path_tuple)
            return

        # Iterate over outgoing arcs from the current node.
        for arc in current_node.get_out_arcs():
            next_node = arc.get_target()
            # Process next node if it is a Gateway, Event, or Task and not already visited.
            if isinstance(next_node, (BPMN.Gateway, BPMN.Event, BPMN.Task)) and next_node not in visited_gateways:
                # Get node name, using type name for gateways and node name otherwise.
                node_name = type(next_node).__name__ if isinstance(next_node, BPMN.Gateway) else next_node.get_name()
                current_path.append(node_name)
                # Mark gateway as visited.
                if isinstance(next_node, BPMN.Gateway):
                    visited_gateways.add(next_node)
                # Recursively call DFS on the next node.
                dfs(next_node, current_path, visited_gateways)
                # Backtrack by removing the last node from the path and unmarking the gateway.
                current_path.pop()
                if isinstance(next_node, BPMN.Gateway):
                    visited_gateways.remove(next_node)

    # Start DFS for each gateway node in the BPMN model.
    for node in bpmn_graph.get_nodes():
        if isinstance(node, BPMN.Gateway):
            start_gateway_id = node.id
            paths_by_gateway[start_gateway_id] = []
            dfs(node, [f"{type(node).__name__}_Source"], set())

    # Group and sort gateways based on their path structures.
    for gateway, paths in paths_by_gateway.items():
        # Sort each path
        sorted_paths = sorted(paths)
        # Convert each sorted path to a string
        paths_strings = ['; '.join(path) for path in sorted_paths]
        # Convert sorted list of path strings to a JSON string
        paths_key = json.dumps(paths_strings)
        structure_to_gateways.setdefault(paths_key, []).append(gateway)   
    
    sorted_structure_to_gateways = {k: structure_to_gateways[k] for k in sorted(structure_to_gateways)}

    return sorted_structure_to_gateways
        
def rebuild_bpmn_graph(bpmn_graph, gateway_mapping):
    # Create a new BPMN model with the same name.
    new_bpmn_graph = BPMN(name=bpmn_graph.get_name())
    node_mapping = {}

    # Add nodes to the new model, applying the gateway mapping.
    sorted_nodes = sorted(bpmn_graph.get_nodes(), key=lambda node: node.get_name())
    for node in sorted_nodes:
        new_id = gateway_mapping.get(node.get_id(), node.get_name())
        new_node = type(node)(id=new_id, name=new_id, process=node.get_process())
        new_bpmn_graph.add_node(new_node)
        node_mapping[node] = new_node

    # Add flows to the new model, mapping source and target nodes.
    sorted_flows = sorted(bpmn_graph.get_flows(), key=lambda flow: (flow.get_source().get_name(), flow.get_target().get_name()))
    for flow in sorted_flows:
        new_source = node_mapping[flow.get_source()]
        new_target = node_mapping[flow.get_target()]
        new_flow = type(flow)(source=new_source, target=new_target, id=flow.get_id(), name=flow.get_name(), process=flow.get_process())
        new_bpmn_graph.add_flow(new_flow)
        
    return new_bpmn_graph

def handle_gateways_for_model(gateway_paths, other_gateway_paths, gateway_counter, name_mappings, model_label):
    for path_structure, gateways in gateway_paths.items():
        other_gateways = other_gateway_paths.get(path_structure, [])
        num_shared = min(len(gateways), len(other_gateways))
        num_unique = len(gateways) - num_shared
        # Handle shared gateways
        for i in range(num_shared):
            count = gateway_counter["Shared"]
            new_name = f"Gateway_{count}_Shared"
            name_mappings[gateways[i]] = new_name
            name_mappings[other_gateways[i]] = new_name
            gateway_counter["Shared"] += 1
        # Handle unique gateways
        for i in range(num_shared, num_shared + num_unique):
            count = gateway_counter[model_label].get(type(gateways[i]).__name__, 0)
            new_name = f"Gateway_{count}_{model_label}"
            name_mappings[gateways[i]] = new_name
            gateway_counter[model_label][type(gateways[i]).__name__] = count + 1
            
def verify_gateway_mappings(bpmn_model_a, bpmn_model_b, name_mappings):
    # Collect IDs of all gateways in the BPMN models A and B
    original_gateway_ids_a = {node.id for node in bpmn_model_a.get_nodes() if isinstance(node, BPMN.Gateway)}
    original_gateway_ids_b = {node.id for node in bpmn_model_b.get_nodes() if isinstance(node, BPMN.Gateway)}

    # Check if all gateway IDs from A and B are in the name mappings
    missing_gateways_a = original_gateway_ids_a - set(name_mappings.keys())
    missing_gateways_b = original_gateway_ids_b - set(name_mappings.keys())

    if missing_gateways_a:
        print(f"Missing Gateways from Expected in name mappings: {missing_gateways_a}")
    else:
        print("All gateways from Expected are accounted for in name mappings.")

    if missing_gateways_b:
        print(f"Missing Gateways from Faulty in name mappings: {missing_gateways_b}")
    else:
        print("All gateways from Faulty are accounted for in name mappings.") 
                        
def handle_gateways_for_pairs(model_a, model_b): 
    for mode in ProcessingMode:
        for service in model_a[mode.name]:
            for subprocess in model_a[mode.name][service]:
                gateway_counter = {"A": {}, "B": {}, "Shared": 0}
                name_mappings = {}
                bpmn_model_a = model_a[mode.name][service][subprocess]['bpmn_graph']
                bpmn_model_b = model_b[mode.name][service][subprocess]['bpmn_graph']
                gateway_paths_a = extract_gateways(bpmn_model_a)
                gateway_paths_b = extract_gateways(bpmn_model_b)
                handle_gateways_for_model(gateway_paths_a, gateway_paths_b, gateway_counter, name_mappings, "A")
                handle_gateways_for_model(gateway_paths_b, gateway_paths_a, gateway_counter, name_mappings, "B")
                verify_gateway_mappings(bpmn_model_a, bpmn_model_b, name_mappings)
                model_a[mode.name][service][subprocess]['bpmn_graph'] = rebuild_bpmn_graph(bpmn_model_a, name_mappings)
                model_b[mode.name][service][subprocess]['bpmn_graph'] = rebuild_bpmn_graph(bpmn_model_b, name_mappings)

        
all_models = copy.deepcopy(all_process_graphs)      
        
handle_gateways_for_pairs(all_models['Expected'], all_models['Faulty'])
# validate_hidden_transitions(all_models['Model A'], all_models['Faulty'])

All gateways from Expected are accounted for in name mappings.
All gateways from Faulty are accounted for in name mappings.
All gateways from Expected are accounted for in name mappings.
All gateways from Faulty are accounted for in name mappings.


# A*

In [18]:
import heapq

def heuristic(graph: nx.DiGraph, source: str, target: str) -> float:
    return 0 # We dont have a grid

def a_star_search(graph: nx.DiGraph, start: str, goal: str, logs: list, require_loop=False):
    """
    Modified A* Search Algorithm to find loops.
    
    :param graph: NetworkX directed graph
    :param start: Start node ID
    :param goal: Target node ID
    :param logs: List of logs with logging_statement_id
    :return: Shortest path from start to goal, including loops if required
    """
    log_names = [log['logging_statement_id'] for log in logs]  # Extract log names from logs
    open_list = [(0, start, [])]  # Initialize open list with start node
    g_score = {start: 0}  # Initialize g_score for start node as 0
    
    while open_list:  # Loop until open list is empty
        f_score, current, path = heapq.heappop(open_list)  # Pop node with lowest f_score
        
        # If current node is the goal, and loop requirement is met, return path
        if current == goal:
            if not require_loop or (require_loop and len(path) > 1):
                return path + [current]
        
        for neighbor, edge_data in graph[current].items():  # Loop through neighbors
            # Skip nodes in logs except the goal
            if neighbor in log_names and neighbor != goal:
                continue
            
            # Calculate tentative g_score for neighbor
            tentative_g_score = g_score[current] + edge_data.get('weight', 1)
            
            # Update g_score if new path is better or neighbor is not in open list
            if tentative_g_score < g_score.get(neighbor, float('inf')) or neighbor not in [i[1] for i in open_list]:
                g_score[neighbor] = tentative_g_score
                f_score = tentative_g_score + heuristic(graph, neighbor, goal)
                heapq.heappush(open_list, (f_score, neighbor, path + [current]))
                
    return None  # Return None if path is not found



In [19]:
from collections import defaultdict

def initialize_graph_attributes(graph: nx.DiGraph, model: str):
    for node in graph.nodes():
        if 'count' not in graph.nodes[node]:
            graph.nodes[node]['count'] = {'A': 0, 'B': 0}
        if model in ['A', 'B'] and model not in graph.nodes[node]['count']:
            graph.nodes[node]['count'][model] = 0

    for u, v in graph.edges():
        if 'count' not in graph[u][v]:
            graph[u][v]['count'] = {'A': 0, 'B': 0}
        if model in ['A', 'B'] and model not in graph[u][v]['count']:
            graph[u][v]['count'][model] = 0

def update_path_attributes(graph, path, model):
    """
    Update attributes of edges in the given path.
    """
    for i in range(len(path) - 1):
        edge = (path[i], path[i + 1])

        if 'count' not in graph.edges[edge]:
            graph.edges[edge]['count'] = {'A': 0, 'B': 0}
        if model in ['A', 'B']:
            graph.edges[edge]['count'][model] += 1

    for node in path[1:-1]:  # Exclude the start and end nodes
        if 'count' not in graph.nodes[node]:
            graph.nodes[node]['count'] = {'A': 0, 'B': 0}
        if model in ['A', 'B']:
            graph.nodes[node]['count'][model] += 1


            
def update_graph_from_logs(graph: nx.DiGraph, logs: list, model: str):
    """
    Updates graph attributes based on logs for a specific model.
    """
    initialize_graph_attributes(graph, model)
    
    # Step 1: Count occurrences of each logging_statement_id in logs
    log_count = defaultdict(int)
    for log in logs:
        log_count[log['logging_statement_id']] += 1
        
    for i in range(len(logs) - 1):
        start_node = logs[i]['logging_statement_id']
        end_node = logs[i + 1]['logging_statement_id']
        
        require_loop = start_node == end_node 
        
        if start_node not in graph or end_node not in graph:
            missing_type = []
            if start_node not in graph:
                missing_type.append(f"Start node: {start_node}")
            if end_node not in graph:
                missing_type.append(f"End node: {end_node}")
            
            # display(HTML(f"<b><span style='color: orange;'>&emsp;&emsp; --> Missing logging_statement_ids in Model {model}: {', '.join(missing_type)}</span></b>"))
            continue
        
        # A* search to find path between start_node and end_node
        path = a_star_search(graph, start_node, end_node, logs, require_loop)
        if path:
            update_path_attributes(graph, path, model)                   
        # else:
        #     # Print a warning if the path is not found
        #     display(HTML(f"<b><span style='color: orange;'>&emsp;&emsp; --> Path not found between {start_node} and {end_node} in Model {model} - Require Loop {require_loop}.</span></b>"))
            
            # display(HTML(f"<b><span style='color: orange;'>&emsp;&emsp; Attempting to add bridge...</span></b>"))
             # Add a dashed edge with isBridge=True attribute
#             graph.add_edge(start_node, end_node, style='dashed', isBridge=True)
            # plot_graph_with_missing_path(graph, start_node, end_node, title=f"Missing path {start_node} to {end_node} in Model {model}")
            
#             new_path = a_star_search(graph, start_node, end_node)
#             if new_path:
#                 # Update the path attributes using the new path
#                 update_path_attributes(graph, new_path, model)
#             else:
#                 display(HTML(f"<b><span style='color: red;'>&emsp;&emsp; --> Path not found between {start_node} and {end_node} in Model {model}</span></b>"))

    # Set each node's count that corresponds to a log to that log count
    for node, count in log_count.items():
        if node in graph and model in ['A', 'B']:
            graph.nodes[node]['count'][model] = count

    # # Step 2: Validate counts
    # for node, attr in graph.nodes(data=True):
    #     if attr.get('type') == 'transition':
    #         if f'count_{model}' in attr:
    #             if attr[f'count_{model}'] != log_count.get(node, 0):
    #                 display(HTML(f"<b><span style='color: red;'>&emsp;&emsp; --> Mismatch in counts for node {node} in Model {model}. Count in graph: {attr[f'count_{model}']}, Count in logs: {log_count.get(node, 0)}</span></b>"))

In [12]:
import networkx as nx

def plot_graph_with_missing_path(graph, start_node, end_node, title="Graph with Missing Path"):
    """
    Visualize the graph highlighting the start and end nodes where path is missing.

    :param graph: NetworkX graph
    :param start_node: Start node ID
    :param end_node: End node ID
    :param title: Title for the plot
    """
    A = to_agraph(graph)
    
    for node in A.nodes():
        node_name = node.name
        node_type = graph.nodes[node_name].get('type', None)
        
        if node_type == 'place':
            node.attr['shape'] = 'circle'
            node.attr['label'] = ''
            node.attr['width'] = 0.6
            node.attr['height'] = 0.6
            
            if node_name.startswith("sink0"):  
                node.attr['fillcolor'] = 'orange'
                node.attr['style'] = 'filled'
            elif node_name.startswith("source0"):
                node.attr['fillcolor'] = 'green'
                node.attr['style'] = 'filled'
                
        elif node_type == 'transition':
            node.attr['shape'] = 'rectangle'
            
            if node_name.startswith("hid_"):
                node.attr['style'] = 'filled'
                node.attr['fillcolor'] = 'black'
                node.attr['fontcolor'] = 'black'  # Make label invisible
                node.attr['penwidth'] = 5
                node.attr['fontcolor'] = 'white'        
        
        if (start_node == end_node):
            if node_name == start_node:
                node.attr['color'] = 'purple'  # Highlight start_node
                node.attr['penwidth'] = 7  # Make stroke thicker
        else:
            if node_name == start_node:
                node.attr['color'] = 'red'  # Highlight start_node
                node.attr['penwidth'] = 7  # Make stroke thicker

            elif node_name == end_node:
                node.attr['color'] = 'blue'  # Highlight end_node
                node.attr['penwidth'] = 7  # Make stroke thicker

            
    for edge in A.edges():
        edge_attr = graph.get_edge_data(edge[0], edge[1])
        if edge_attr.get('isBridge'):
            edge.attr['style'] = 'dashed'
            edge.attr['color'] = 'purple'

    A.layout(prog='dot')
    
    plt.figure(figsize=(20, 10))
    plt.axis('off')
    plt.title(title)
    A.draw(f"{title}.png")
    img = plt.imread(f"{title}.png")
    plt.imshow(img)
    plt.show()


# Graph Construction

In [20]:
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import to_agraph

def plot_graph(graph, G1, G2, title):
    """
    Visualize the graph highlighting the start and end nodes where path is missing.

    :param graph: NetworkX graph
    :param start_node: Start node ID
    :param end_node: End node ID
    :param title: Title for the plot
    """
    # Identify unique nodes and edges in both graphs
    unique_nodes_G1 = set(G1.nodes()) - set(G2.nodes())
    unique_nodes_G2 = set(G2.nodes()) - set(G1.nodes())
    
    unique_edges_G1 = set(G1.edges()) - set(G2.edges())
    unique_edges_G2 = set(G2.edges()) - set(G1.edges())
    
    A = to_agraph(graph)
    
    for node in A.nodes():
        node_name = node.name
        node_type = graph.nodes[node_name].get('type', None)
        
        if node_type == 'StartEvent':
            node.attr['shape'] = 'circle'
            node.attr['label'] = ''
            node.attr['width'] = 0.6
            node.attr['height'] = 0.6
            node.attr['fillcolor'] = 'green'
            node.attr['style'] = 'filled'    
            
        elif node_type == 'NormalEndEvent':
            node.attr['shape'] = 'circle'
            node.attr['label'] = ''
            node.attr['width'] = 0.6
            node.attr['height'] = 0.6
            node.attr['fillcolor'] = 'orange'
            node.attr['style'] = 'filled'      
            
        elif node_type == 'Task':
            node.attr['shape'] = 'rectangle'
          
        elif node_type == 'ExclusiveGateway':
            node.attr['shape'] = 'diamond'
            node.attr['label'] = 'X'
            node.attr['width'] = 0.6
            node.attr['height'] = 0.6
            node.attr['style'] = 'filled'

        elif node_type == 'ParallelGateway':
            node.attr['shape'] = 'diamond'
            node.attr['label'] = '+'
            node.attr['width'] = 0.6
            node.attr['height'] = 0.6
            node.attr['style'] = 'filled'  
        
        else :
            print(f"Could not handle node of type: {node_type}")
         
             
        if node_name in unique_nodes_G1:  # Unique to G1
            node.attr['color'] = 'lightblue'
            node.attr['style'] = 'filled'
        elif node_name in unique_nodes_G2:  # Unique to G2
            node.attr['color'] = 'red'
            node.attr['style'] = 'filled'
            
    for edge in A.edges():
        edge_attr = graph.get_edge_data(edge[0], edge[1])
        if edge_attr.get('isBridge'):
            edge.attr['style'] = 'dashed'
            edge.attr['color'] = 'purple'
            
        if (edge[0], edge[1]) in unique_edges_G1:  # Unique to G1
            edge.attr['color'] = 'lightblue'
            edge.attr['penwidth'] = 5
        elif (edge[0], edge[1]) in unique_edges_G2:  # Unique to G2
            edge.attr['color'] = 'red'
            edge.attr['penwidth'] = 5
            
            
    A.layout(prog='dot')
    
    plt.figure(figsize=(20, 10))
    plt.axis('off')
    plt.title(title)
    A.draw(f"Graphs/{title}.png")
    img = plt.imread(f"Graphs/{title}.png")
    plt.imshow(img)
    plt.show()

In [21]:
import os
from networkx.drawing.nx_agraph import to_agraph, write_dot
from pm4py.objects.petri_net.obj import PetriNet
from pm4py.objects.petri_net.utils.networkx_graph import create_networkx_directed_graph
from pm4py.objects.petri_net.utils import petri_utils

def bpmn_to_networkx(bpmn_model):
    """
    Convert a BPMN model to a NetworkX graph.
    """
    G = nx.DiGraph()

    # Add nodes to the NetworkX graph using names
    for node in bpmn_model.get_nodes():
        G.add_node(node.name, label=node.name, type=type(node).__name__)

    # Add edges to the NetworkX graph using names
    for flow in bpmn_model.get_flows():
        source_name = flow.source.name if hasattr(flow.source, 'name') else None
        target_name = flow.target.name if hasattr(flow.target, 'name') else None
        if source_name and target_name:
            G.add_edge(source_name, target_name)

    return G

def annotate_union_graph(G1, G2, G_union):
    """
    Annotate the union graph nodes and edges based on their presence in G1 and G2.
    """
    for node in G_union.nodes():
        G_union.nodes[node]['in_graph'] = {
            'A': node in G1.nodes(),
            'B': node in G2.nodes()
        }
        # G_union.nodes[node]['count'] = {
        #     'A': G1.nodes[node]['count'] if node in G1.nodes() else 0,
        #     'B': G2.nodes[node]['count'] if node in G2.nodes() else 0
        # }
        
    for u, v, attributes in G_union.edges(data=True):
        edge = (u, v)
        G_union.edges[edge]['in_graph'] = {
            'A': edge in G1.edges(),
            'B': edge in G2.edges()
        }
        # G_union.edges[edge]['count'] = {
        #     'A': G1.edges[edge]['count'] if edge in G1.edges() else 0,
        #     'B': G2.edges[edge]['count'] if edge in G2.edges() else 0
        # }
        
def save_and_plot_all_models(all_models, raw_log_data_A, raw_log_data_B):
    """
    Saves and plots all models based on raw log data.
    """
    json_data = {}
    
    # Extract models A and B
    model_a_data = all_models.get('Expected', {})
    model_b_data = all_models.get('Faulty', {})

    service_names = []
    
    for mode_name, services in model_a_data.items():
        json_data[mode_name] = {
        "viewType": "Contrast", 
        "graphType": "BPMN", 
        "data": {} 
        }
        
        for service_name, subprocesses in services.items():
            service_names.append(service_name)
            
            json_data[mode_name]["data"][service_name] = {} 
            
            for subprocess_name, model_data in subprocesses.items():
                # Extract and initialize Petri Nets for Expected and Faulty
                bpmn_model_A = model_data.get('bpmn_graph', None)
                bpmn_model_B = model_b_data.get(mode_name, {}).get(service_name, {}).get(subprocess_name, {}).get('bpmn_graph', None)
                if bpmn_model_A is None or bpmn_model_B is None:
                    continue
                
                # print(bpmn_model_B)
                # print(f"Structure of BPMN Faulty - {subprocess_name}:")
                # for node in bpmn_model_B.get_nodes():
                #     print(f"Node: {node.get_name()}, Type: {type(node).__name__}")
                # for flow in bpmn_model_B.get_flows():
                #     print(f"Flow: {flow.get_source().get_name()} -> {flow.get_target().get_name()}")
                    
                G_A  = bpmn_to_networkx(bpmn_model_A)
                G_B  = bpmn_to_networkx(bpmn_model_B)
                
                # Fetch corresponding logs
                log_data_A = raw_log_data_A.get(mode_name, {}).get(service_name, {}).get(subprocess_name, {})
                log_data_B = raw_log_data_B.get(mode_name, {}).get(service_name, {}).get(subprocess_name, {})
                
                # Update graphs based on logs
                update_graph_from_logs(G_A, log_data_A.get('logs', []), "A")
                update_graph_from_logs(G_B, log_data_B.get('logs', []), "B")
                
                # Generate the title and file path
                title = f"{service_name} - {subprocess_name}" 
                # file_path_comparison = f"Graphs/Comparison/{mode_name}/{title}.dot"

                # Create union graph and annotate
                G_union = nx.compose(G_A, G_B)
                annotate_union_graph(G_A, G_B, G_union)
                
                # Convert node attributes to strings
                for node, attr in G_union.nodes(data=True):
                    updated_attr = {k: str(v) for k, v in attr.items()}
                    G_union.nodes[node].update(updated_attr)

                # Convert edge attributes to strings
                for u, v, attr in G_union.edges(data=True):
                    updated_attr = {k: str(v) for k, v in attr.items()}
                    G_union[u][v].update(updated_attr)
                                
                # plot_graph(G_union, G_A, G_B, title)
                
                # Save the graph in .dot format and read its content
                temp_dot_file = f"temp_union_graph.dot"
                write_dot(G_union, temp_dot_file)
                with open(temp_dot_file, 'r') as file:
                    graph_content = file.read()
                os.remove(temp_dot_file)  # Remove the temporary file
                
                json_data[mode_name]["data"][service_name][subprocess_name] = {
                    "graphData": graph_content,
                    "logData": {
                    "A":log_data_A,
                    "B": log_data_B,    
                    },
                }
         
         # Add Blackbox Services
        service_names.append("web-app")
        service_names.append("mail-service")

        json_data[mode_name]["services"] = service_names
        
        save_json(json_data[mode_name], f"../Data/Reproduced Experiment/Diagnosis/Models/Part B/model_partB.json")
        
def save_json(data, file_path):
    """Save a Python dictionary to a JSON file."""
    print(f"Saving {file_path}")
    
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4)
        
        
# Call the function to save and plot all models
save_and_plot_all_models(all_models, logs_A, logs_B)
print("DONE")

Saving ../Data/Reproduced Experiment/Diagnosis/Models/Part B/model_partB.json
DONE
