In [3]:
import pydot
import networkx as nx
from networkx.drawing.nx_pydot import read_dot
from collections import defaultdict
import regex

def sanitize_dot(dot_string):
    return regex.sub(r'(?<!"):(?!")', '_', dot_string)

def merge_same_name_nodes(input_file, output_file):

    # Use this function when reading your dot file
    with open(input_file, 'r') as f:
        dot_string = f.read()

    dot_string = sanitize_dot(dot_string)

    # Write cleaned dot string to a temporary file
    with open('temp.dot', 'w') as f:
        f.write(dot_string)

    # Load the dot file
    G = nx.nx_pydot.read_dot('temp.dot')

    # Create a dictionary to keep track of nodes at each level
    level_dict = defaultdict(list)

    # Iterate through all nodes in the graph
    for node in G.nodes():
        # Calculate the level of the node as the shortest path from the root (assumed to be node with name "root")
        level = nx.shortest_path_length(G, source=node, target=list(G.nodes)[0])

        # Add the node to the appropriate level in the dictionary
        if level > 1:
            level_dict[level].append(node)

    # Iterate through all levels in the dictionary
    for level in level_dict:
        # Create a dictionary to keep track of node names at this level
        node_name_dict = defaultdict(list)

        # Iterate through all nodes at this level
        for node in level_dict[level]:
            # Add the node to the appropriate name in the dictionary
            node_name_dict[node.split('|')[0]].append(node)

        # Iterate through all node names in the dictionary
        for node_name in node_name_dict:
            # If there are multiple nodes with this name at this level
            if len(node_name_dict[node_name]) > 1:
                # Choose one node to keep and the others to merge
                keep_node = node_name_dict[node_name][0]
                merge_nodes = node_name_dict[node_name][1:]

                # Iterate through all nodes to merge
                for merge_node in merge_nodes:
                    # Iterate through all edges of the node to merge
                    for u, v, key, data in list(G.in_edges(merge_node, data=True, keys=True)):
                        # This edge is incoming, so add a corresponding edge to the keep node
                        G.add_edge(u, keep_node, **data)

                    for u, v, key, data in list(G.out_edges(merge_node, data=True, keys=True)):
                        # This edge is outgoing, so add a corresponding edge from the keep node
                        G.add_edge(keep_node, v, **data)

                    # Finally, remove the merge node
                    G.remove_node(merge_node)
    for node in G.nodes():
        G = nx.relabel_nodes(G, {node: node.replace(':', '_')})
    for node in G.nodes(data=True):
        if 'style' in node[1]:
            if node[1]['style'] == '"dotted"':
                del node[1]['style']
                if 'fillcolor' in node[1]:
                    node[1]['style'] = 'filled'
    #print(G.nodes(data=True))
    # Save the merged graph
    nx.nx_pydot.write_dot(G, output_file)

# Call the function with your input and output file names
merge_same_name_nodes("C:/Users/alexd/PycharmProjects/SAGE/baseline-2017AGs/baseline-2017.txt-attack-graph-for-victim-10.0.0.224-NETWORKDOSmswbtserver.dot", "C:/Users/alexd/PycharmProjects/SAGE/try1.dot")


In [4]:
import os
# Call the function for a folder with dot files
spdfa_attack_graphs = [file for file in os.listdir("C:/Users/alexd/PycharmProjects/SAGE/baseline-2018AGs") if file.endswith('.dot')]
for file in spdfa_attack_graphs:
    merge_same_name_nodes("C:/Users/alexd/PycharmProjects/SAGE/baseline-2018AGs/" + file, "C:/Users/alexd/PycharmProjects/SAGE/bobeala2/" + file)