In [30]:
import pandas as pd
import json
from pathlib import Path

In [31]:
verification_v1 = pd.read_csv("new data/CLD data - Verification_data V1.csv")
verification_v2 = pd.read_csv("new data/CLD data - Verification_data V2.csv")
node_mapping = pd.read_csv("new data/CLD data - Node Mapping.csv")
node_final = pd.read_csv("new data/CLD data - Node Final.csv")

def normalize_color(value):
    try:
        return str(int(float(value)))
    except:
        return "0"

verification_v1['version'] = 'V1'
verification_v2['version'] = 'V2'
verification_combined = pd.concat([verification_v1, verification_v2], ignore_index=True)

verification_combined['source_color'] = verification_combined['source_color'].fillna('0').astype(str).apply(normalize_color)
verification_combined['target_color'] = verification_combined['target_color'].fillna('0').astype(str).apply(normalize_color)
verification_combined['link_color'] = verification_combined['link_color'].fillna('0').astype(str).apply(normalize_color)

verification_combined['participant_type'] = (
    verification_combined['participant_type'].str.strip().str.lower().str.replace(' ', '_')
)
verification_combined['group'] = (
    verification_combined['participant_type'] + "_" + verification_combined['participant_number'].astype(str)
)

column_mapping = {
    'cause': 'source',
    'effect': 'target',
    'Master ID': 'source_master_id',
    'Node ID': 'source_node_id',
    'Master ID.1': 'target_master_id',
    'Node ID.1': 'target_node_id',
    'polarity': 'polarity',
    'participant_number': 'participant_number',
    'participant_type': 'participant_type',
    'source_color': 'source_color',
    'target_color': 'target_color',
    'Comment': 'comment'
}
verification_combined.rename(columns=column_mapping, inplace=True)

def map_comment_to_type(row):
    if row['version'] == 'V1':
        return 'O'
    elif pd.isna(row['comment']) or row['comment'].strip() == '':
        return 'K'
    return ''.join(w.capitalize()[0] for w in row['comment'].split())

verification_combined['types'] = verification_combined.apply(map_comment_to_type, axis=1)

node_v1_index_map = node_mapping.set_index('Node ID')['Index'].to_dict()
node_v2_index_map = node_mapping.set_index('Node ID.1')['Index'].to_dict()
node_index_map = {**node_v1_index_map, **node_v2_index_map}
node_final_master_map = node_final.set_index('Node ID')['Master ID'].to_dict()
merged_node_index_map = {**node_index_map, **node_final_master_map}

def map_node_id_to_index(node_id):
    return str(merged_node_index_map.get(node_id, node_id))

verification_combined['source_map_id'] = verification_combined['source_node_id'].apply(map_node_id_to_index)
verification_combined['target_map_id'] = verification_combined['target_node_id'].apply(map_node_id_to_index)

src_colors = verification_combined[['source_map_id', 'source_color', 'group', 'version']]
tgt_colors = verification_combined[['target_map_id', 'target_color', 'group', 'version']]
src_colors.columns = ['node_id', 'color', 'group', 'version']
tgt_colors.columns = ['node_id', 'color', 'group', 'version']

all_node_colors = pd.concat([src_colors, tgt_colors])
all_node_colors = all_node_colors.dropna()
all_node_colors['node_id'] = all_node_colors['node_id'].astype(str)
all_node_colors['color'] = all_node_colors['color'].apply(normalize_color)

color_lookup = {
    (row['group'], row['version'], row['node_id']): row['color']
    for _, row in all_node_colors.iterrows()
}

v2_to_v1_nodeid = node_mapping.set_index('Node ID.1')['Node ID'].dropna().astype(str).to_dict()
node_label_map = node_final.set_index('Master ID')['Node Name'].to_dict()

def build_colored_node(id_val, label, group, version_prefix):
    id_str = str(id_val)
    color = color_lookup.get((group, version_prefix, id_str), "0")
    participant_type = group.split("_")[0]

    # Label fallback from verification_combined
    if not label:
        fallback_rows = verification_combined[
            (verification_combined['version'] == version_prefix) & (
                (verification_combined['source_map_id'].astype(str) == id_str) |
                (verification_combined['target_map_id'].astype(str) == id_str)
            ) & (verification_combined['group'] == group)
        ]
        if not fallback_rows.empty:
            label = fallback_rows.iloc[0]['source'] if fallback_rows.iloc[0]['source_map_id'] == id_str else fallback_rows.iloc[0]['target']

    history = [{
        "id": id_str,
        "key": id_str,
        "label": label,
        "participant_type": participant_type,
        "group": group,
        "history": []
    }]

    if version_prefix == "V2":
        v1_node_id = v2_to_v1_nodeid.get("V2_" + id_str)
        if v1_node_id:
            clean_id = str(v1_node_id).replace("V1_", "").replace("V2_", "")
            v1_label_match = node_mapping[node_mapping['Node ID'].astype(str) == str(v1_node_id)]['Node Name V1'].values
            v1_label = v1_label_match[0] if len(v1_label_match) > 0 else label
            history = [{
                "id": clean_id,
                "key": clean_id,
                "label": v1_label,
                "participant_type": participant_type,
                "group": group,
                "history": []
            }]


    return {
        "data": {
            "id": id_str,
            "key": f"{version_prefix}_{id_str}",
            "label": label,
            "participant_type": participant_type,
            "color": color,
            "history": history
        },
        "classes": ""
    }

grouped_all_data = {}

for group in verification_combined['group'].unique():
    gdf = verification_combined[verification_combined['group'] == group]
    v1_edges, v2_edges = [], []
    v1_nodes, v2_nodes = set(), set()

    for _, row in gdf.iterrows():
        edge = {
            "id": f"{row['source_map_id']}-{row['target_map_id']}",
            "source": str(row['source_map_id']),
            "target": str(row['target_map_id']),
            "label": "+" if row['polarity'] == "positive" else "-",
            "value": 1 if row['polarity'] == "positive" else -1,
            "polarity": row["polarity"],
            "participant_type": row["participant_type"],
            "group": group,
            "color": row["link_color"]
        }
        if row['version'] == 'V1':
            v1_edges.append({"data": edge})
            v1_nodes.update([row['source_map_id'], row['target_map_id']])
        else:
            v2_edges.append({"data": edge})
            v2_nodes.update([row['source_map_id'], row['target_map_id']])

    v1_node_objs = [
        build_colored_node(
            n,
            node_mapping[node_mapping['Node ID'].astype(str) == str(n)]['Node Name V1'].values[0]
            if not node_mapping[node_mapping['Node ID'].astype(str) == str(n)].empty else "",
            group,
            "V1"
        )
        for n in sorted(v1_nodes)
    ]
    # Start with V2 node IDs from actual links
    all_v2_ids = set(v2_nodes)

    # Also include V2 nodes that appear in verification_combined under this group
    valid_group_v2_ids = verification_combined[
        (verification_combined['version'] == 'V2') & (verification_combined['group'] == group)
    ]['source_node_id'].astype(str).tolist() + \
    verification_combined[
        (verification_combined['version'] == 'V2') & (verification_combined['group'] == group)
    ]['target_node_id'].astype(str).tolist()

    # Normalize and add from mapping if they’re linked to this group
    mapping_v2_ids = node_mapping['Node ID.1'].dropna().astype(str).str.replace('V2_', '')
    all_v2_ids.update(set(valid_group_v2_ids) & set(mapping_v2_ids))


    v2_node_objs = [
        build_colored_node(
            n,
            node_mapping[node_mapping['Node ID.1'] == "V2_" + n]['Node Name V2'].values[0]
            if not node_mapping[node_mapping['Node ID.1'] == "V2_" + n].empty else "",
            group,
            "V2"
        )
        for n in sorted(all_v2_ids)
    ]

    grouped_all_data[group] = {
        "V1": {"nodes": v1_node_objs, "edges": v1_edges},
        "final": {"nodes": v2_node_objs, "edges": v2_edges}
    }

with open("new_data.js", "w", encoding="utf-8") as f:
    f.write("const allCyDataSets = " + json.dumps(grouped_all_data, indent=2) + ";")
