# Modify The Graph

Modify graphs

In [349]:
# Import necessary packages
import math
import pandas as pd
import xml.etree.ElementTree as ET

In [350]:
# Convert .graphml to Pandas DataFrame
def convert_graphml_to_dataframe(file_path):
    # Define XML namespaces to correctly parse the file
    namespaces = {
        'y': 'http://www.yworks.com/xml/graphml',
        'graphml': 'http://graphml.graphdrawing.org/xmlns'
    }

    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    nodes_list = []
    edges_list = []

    # Process nodes
    for node in root.findall('.//graphml:node', namespaces):
        node_id = node.get('id')
        geometry = node.find('.//y:Geometry', namespaces)

        if geometry is not None:
            x_coord = float(geometry.get('x'))
            y_coord = float(geometry.get('y'))
            nodes_list.append({
                'node': node_id,
                'x': x_coord,
                'y': y_coord
            })

    # Process edges
    for edge in root.findall('.//graphml:edge', namespaces):
        source_id = edge.get('source')
        target_id = edge.get('target')
        edges_list.append({
            'source': source_id,
            'target': target_id
        })

    # Create DataFrames
    nodes_df = pd.DataFrame(nodes_list)
    edges_df = pd.DataFrame(edges_list)
    edges_df = edges_df[edges_df['source'] != edges_df['target']].reset_index(drop=True)
    
    return nodes_df, edges_df

In [None]:
# Convert Pandas DataFrame to .graphml file
def convert_dataframe_to_graphml(nodes_df, edges_df, file_path, directed=True):
    # Define XML namespaces to correctly parse the file
    namespaces = {
        'graphml': "http://graphml.graphdrawing.org/xmlns",
        'y': "http://www.yworks.com/xml/graphml",
        'xsi': "http://www.w3.org/2001/XMLSchema-instance",
        'yed': "http://www.yworks.com/xml/yed/3",
        'java': "http://www.yworks.com/xml/yfiles-common/1.0/java",
        'sys': "http://www.yworks.com/xml/yfiles-common/markup/primitives/2.0",
        'x': "http://www.yworks.com/xml/yfiles-common/markup/2.0"
    }

    # Registers the namspaces
    for prefix, uri in namespaces.items():
        ET.register_namespace(prefix, uri)

    # Add root <graph> element
    graphml_root = ET.Element('graphml', attrib={'xmlns': namespaces['graphml'], 'xmlns:java': namespaces['java'], 'xmlns:sys': namespaces['sys'], 'xmlns:x': namespaces['x'], 'xmlns:xsi': namespaces['xsi'], 'xmlns:y': namespaces['y'], 'xmlns:yed': namespaces['yed'], 'xsi:schemaLocation': "http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd"})
    
    ET.SubElement(graphml_root, 'key', attrib={'attr.name': 'Description', 'attr.type': 'string', 'for': 'graph', 'id': 'd0'})
    ET.SubElement(graphml_root, 'key', attrib={'for': 'port', 'id': 'd1', 'yfiles.type': 'portgraphics'})
    ET.SubElement(graphml_root, 'key', attrib={'for': 'port', 'id': 'd2', 'yfiles.type': 'portgeometry'})
    ET.SubElement(graphml_root, 'key', attrib={'for': 'port', 'id': 'd3', 'yfiles.type': 'portuserdata'})
    ET.SubElement(graphml_root, 'key', attrib={'attr.name': 'url', 'attr.type': 'string', 'for': 'node', 'id': 'd4'})
    ET.SubElement(graphml_root, 'key', attrib={'attr.name': 'description', 'attr.type': 'string', 'for': 'node', 'id': 'd5'})
    ET.SubElement(graphml_root, 'key', attrib={'for': 'node', 'id': 'd6', 'yfiles.type': 'nodegraphics'})
    ET.SubElement(graphml_root, 'key', attrib={'for': 'graphml', 'id': 'd7', 'yfiles.type': 'resources'})
    ET.SubElement(graphml_root, 'key', attrib={'attr.name': 'url', 'attr.type': 'string', 'for': 'edge', 'id': 'd8'})
    ET.SubElement(graphml_root, 'key', attrib={'attr.name': 'description', 'attr.type': 'string', 'for': 'edge', 'id': 'd9'})
    ET.SubElement(graphml_root, 'key', attrib={'for': 'edge', 'id': 'd10', 'yfiles.type': 'edgegraphics'})
   
    # Add node <graph> element
    graph = ET.SubElement(graphml_root, 'graph', attrib={'edgedefault': 'directed' if directed else 'undirected', 'id': 'G'})
    
    for _, row in nodes_df.iterrows():
        node = ET.SubElement(graph, 'node', attrib={'id': row['node']})
        data = ET.SubElement(node, 'data', attrib={'key': 'd6'})
        shape_node = ET.SubElement(data, 'y:ShapeNode')
        ET.SubElement(shape_node, 'y:Geometry', attrib={'height': '12.976852521408091', 'width': '12.048095535010361', 'x': str(row['x']), 'y': str(row['y'])})
    
    # Add edge attributes
    edge_attribs = {'source': 'source', 'target': 'target'} if directed else {'node1': 'source', 'node2': 'target'}
    
    for _, row in edges_df.iterrows():
        edge_element = ET.SubElement(graph, 'edge', attrib={edge_attribs['source']: row['source'], edge_attribs['target']: row['target']})
        edge_data = ET.SubElement(edge_element, 'data', attrib={'key': 'd10'})
        ET.SubElement(edge_data, 'y:PolyLineEdge')
    
    # Create tree
    tree = ET.ElementTree(graphml_root)
    
    tree.write(file_path, encoding='UTF-8', xml_declaration=True)

In [None]:
# Align graph nodes
def strict_graph_grid(nodes_df, grid_size=50):
    def snap(v):
        return grid_size * round(v / grid_size)

    out = nodes_df.copy()
    out['x'] = out['x'].apply(snap)
    out['y'] = out['y'].apply(snap)
    return out

In [353]:
nodes_df, edges_df = convert_graphml_to_dataframe("graphs/original/HIMCM_graph.graphml")

In [354]:
nodes_df

Unnamed: 0,node,x,y
0,n0,539.732018,814.371012
1,n1,491.392962,814.701509
2,n2,540.242020,765.082562
3,n3,589.438012,813.861011
4,n4,491.507934,764.572560
...,...,...,...
714,n714,1373.812038,1537.128905
715,n715,1325.140950,1537.492159
716,n716,1424.182097,1537.824192
717,n717,1469.084899,1536.164029


In [355]:
edges_df

Unnamed: 0,source,target
0,n12,n11
1,n12,n14
2,n11,n4
3,n12,n4
4,n14,n4
...,...,...
2594,n623,n626
2595,n97,n527
2596,n99,n521
2597,n90,n521


In [356]:
strict_nodes_df = strict_graph_grid(nodes_df, grid_size=50)

In [357]:
convert_dataframe_to_graphml(strict_nodes_df, edges_df, "graphs/outputs/modified_graph.graphml")