## Refine DAG
This notebook process the snakemake DAG and refine for using it in the yEd layout.

To generate the dag:
1. `snakemake --rulegraph > dag.dot`
2. Upload the `dag.dot` to https://www.yworks.com/yed-live/
3. Use left to right layout and octilinear in the layout setting
4. Open it with https://www.yworks.com/products/yed/download#download
5. Save the file as graphml
6. Use this notebook to refine

In [None]:
import networkx as nx
import pandas as pd

In [None]:
G = nx.read_graphml("dag_v0.8.3_raw.graphml")

In [None]:
label = {}
for n, data in G.nodes(data=True):
    number = n.strip("n")
    label[number] = data["label"]
    data["label"] = number

In [None]:
df_new_labels = pd.DataFrame.from_dict({"Rule Name" : label}).reset_index()
df_new_labels = df_new_labels.rename(columns={"index" : "node_number"})
df_new_labels

In [None]:
df_old_labels = pd.read_csv("old_rule_name.txt", sep="\t")

In [None]:
existing_rule = []
new_rules = {}
df_old_labels["Rule Name"]
for i in df_new_labels.index:
    rule_name = df_new_labels.loc[i, "Rule Name"]
    if rule_name in df_old_labels["Rule Name"].to_list():
        existing_rule.append(rule_name)
        subset = df_old_labels[df_old_labels["Rule Name"] == rule_name]
        idx = subset.index[0]
        description = df_old_labels.loc[idx, "Description"]
        old_node = df_old_labels.loc[idx, "node_number"]
        df_new_labels.loc[i, "Description"] = description
        df_new_labels.loc[i, "old_node_number"] = str(old_node)
    else:
        new_rules[i] = rule_name

In [None]:
df_new_labels.to_csv("node_description.csv", index=False)

In [None]:
df_old_labels[~df_old_labels["Rule Name"].isin(existing_rule)]

In [None]:
new_rules

In [None]:
import xml.etree.ElementTree as ET
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)

def rename_node_labels(input_file, output_file):
    # Parse the XML document
    tree = ET.parse(input_file)
    root = tree.getroot()

    # Define the namespaces
    ns = {'graphml': 'http://graphml.graphdrawing.org/xmlns',
          'y': 'http://www.yworks.com/xml/graphml'}

    # Iterate over all nodes in the document
    for node in root.findall('.//graphml:node', ns):
        # Get the node id and remove the 'n'
        node_id = node.get('id').replace('n', '')
        logging.debug(f'Processing node with id {node_id}')

        # Find the NodeLabel element and update its text
        node_label = node.find('.//y:NodeLabel', ns)
        if node_label is not None:
            logging.debug(f'Found NodeLabel for node {node_id}, current text: {node_label.text}')
            node_label.text = node_id
            logging.debug(f'Updated NodeLabel text to {node_id}')
        else:
            logging.warning(f'No NodeLabel found for node {node_id}')

    # Write the changes back to the file
    tree.write(output_file)
    logging.info(f'Wrote updated graph to {output_file}')

rename_node_labels("dag_v0.8.3_raw.graphml", "dag_v0.8.3.graphml")

rename_node_labels("dag_v0.8.3_raw.graphml", "dag_v0.8.3.graphml")