In [1]:
import pandas as pd
import json
from pathlib import Path

In [2]:
verification_v1 = pd.read_csv("new data/CLD data - Verification_data V1.csv")
verification_v2 = pd.read_csv("new data/CLD data - Verification_data V2.csv")
node_mapping = pd.read_csv("new data/CLD data - Node Mapping.csv")
node_final = pd.read_csv("new data/CLD data - Node Final.csv")

def normalize_color(value):
    try:
        return str(int(float(value)))
    except:
        return "0"

verification_v1['version'] = 'V1'
verification_v2['version'] = 'V2'
verification_combined = pd.concat([verification_v1, verification_v2], ignore_index=True)

verification_combined['source_color'] = verification_combined['source_color'].fillna('0').astype(str).apply(normalize_color)
verification_combined['target_color'] = verification_combined['target_color'].fillna('0').astype(str).apply(normalize_color)
verification_combined['link_color'] = verification_combined['link_color'].fillna('0').astype(str).apply(normalize_color)

verification_combined['participant_type'] = (
    verification_combined['participant_type'].str.strip().str.lower().str.replace(' ', '_')
)
verification_combined['group'] = (
    verification_combined['participant_type'] + "_" + verification_combined['participant_number'].astype(str)
)

column_mapping = {
    'cause': 'source',
    'effect': 'target',
    'Master ID': 'source_master_id',
    'Node ID': 'source_node_id',
    'Master ID.1': 'target_master_id',
    'Node ID.1': 'target_node_id',
    'polarity': 'polarity',
    'participant_number': 'participant_number',
    'participant_type': 'participant_type',
    'source_color': 'source_color',
    'target_color': 'target_color',
    'Comment': 'comment'
}
verification_combined.rename(columns=column_mapping, inplace=True)

def map_comment_to_type(row):
    if row['version'] == 'V1':
        return 'O'
    elif pd.isna(row['comment']) or row['comment'].strip() == '':
        return 'K'
    return ''.join(w.capitalize()[0] for w in row['comment'].split())

verification_combined['types'] = verification_combined.apply(map_comment_to_type, axis=1)

node_v1_index_map = node_mapping.set_index('Node ID')['Index'].to_dict()
node_v2_index_map = node_mapping.set_index('Node ID.1')['Index'].to_dict()
node_index_map = {**node_v1_index_map, **node_v2_index_map}
node_final_master_map = node_final.set_index('Node ID')['Master ID'].to_dict()
merged_node_index_map = {**node_index_map, **node_final_master_map}

def map_node_id_to_index(node_id):
    return str(merged_node_index_map.get(node_id, node_id))

verification_combined['source_map_id'] = verification_combined['source_node_id'].apply(map_node_id_to_index)
verification_combined['target_map_id'] = verification_combined['target_node_id'].apply(map_node_id_to_index)

src_colors = verification_combined[['source_map_id', 'source_color', 'group', 'version']]
tgt_colors = verification_combined[['target_map_id', 'target_color', 'group', 'version']]
src_colors.columns = ['node_id', 'color', 'group', 'version']
tgt_colors.columns = ['node_id', 'color', 'group', 'version']

all_node_colors = pd.concat([src_colors, tgt_colors])
all_node_colors = all_node_colors.dropna()
all_node_colors['node_id'] = all_node_colors['node_id'].astype(str)
all_node_colors['color'] = all_node_colors['color'].apply(normalize_color)

color_lookup = {
    (row['group'], row['version'], row['node_id']): row['color']
    for _, row in all_node_colors.iterrows()
}

def build_colored_node(id_val, label, group, version_prefix):
    color = color_lookup.get((group, version_prefix, str(id_val)), "0")
    return {
        "data": {
            "id": str(id_val),
            "key": f"{version_prefix}_{id_val}",
            "label": label,
            "participant_type": group.split("_")[0],
            "color": color,
            "history": []
        },
        "classes": ""
    }

grouped_all_data = {}
node_label_map = node_final.set_index('Master ID')['Node Name'].to_dict()

for group in verification_combined['group'].unique():
    gdf = verification_combined[verification_combined['group'] == group]
    v1_edges, v2_edges = [], []
    v1_nodes, v2_nodes = set(), set()

    for _, row in gdf.iterrows():
        edge = {
            "id": f"{row['source_map_id']}-{row['target_map_id']}",
            "source": str(row['source_map_id']),
            "target": str(row['target_map_id']),
            "label": "+" if row['polarity'] == "positive" else "-",
            "value": 1 if row['polarity'] == "positive" else -1,
            "polarity": row["polarity"],
            "participant_type": row["participant_type"],
            "group": group,
            "color": row["link_color"]
        }
        if row['version'] == 'V1':
            v1_edges.append({"data": edge})
            v1_nodes.update([row['source_map_id'], row['target_map_id']])
        else:
            v2_edges.append({"data": edge})
            v2_nodes.update([row['source_map_id'], row['target_map_id']])

    v1_node_objs = [build_colored_node(n, node_label_map.get(float(n), ""), group, "V1") for n in sorted(v1_nodes)]
    v2_node_objs = [build_colored_node(n, node_label_map.get(float(n), ""), group, "V2") for n in sorted(v2_nodes)]

    grouped_all_data[group] = {
        "V1": {"nodes": v1_node_objs, "edges": v1_edges},
        "final": {"nodes": v2_node_objs, "edges": v2_edges}
    }

with open("new_data.js", "w", encoding="utf-8") as f:
    f.write("const allCyDataSets = " + json.dumps(grouped_all_data, indent=2) + ";")