In [None]:
# Generate a dummy Foundry-style ontology dataset with nodes, edges, and params.
import json, os, random, math
from datetime import datetime, timedelta

random.seed(42)

base_dir = "/mnt/data/foundry_ontology_dummy"
os.makedirs(base_dir, exist_ok=True)

today = datetime(2025, 8, 23)

# --- Create nodes ---
people = [
    ("P001", "Alex Kim"),
    ("P002", "Jordan Lee"),
    ("P003", "Riley Patel"),
    ("P004", "Samira Chen"),
    ("P005", "Diego Alvarez"),
    ("P006", "Priya Singh"),
    ("P007", "Morgan Davis"),
    ("P008", "Chen Wei"),
    ("P009", "Taylor Brooks"),
    ("P010", "Casey Morgan"),
]

departments = ["Trading", "Compliance", "Engineering", "Risk", "Operations"]
roles = ["Analyst", "Manager", "Director", "Engineer", "Associate"]

def clamp(x, lo, hi): return max(lo, min(hi, x))

nodes = []
for pid, name in people:
    base = clamp(int(random.gauss(450, 180)), 0, 900)  # spread across 0-900
    node = {
        "id": pid,
        "entityType": "Person",
        "name": name,
        "department": random.choice(departments),
        "role": random.choice(roles),
        "baseScore": base,
        # initial score = base; your pipeline will recompute this
        "score": base,
        "createdAt": (today - timedelta(days=random.randint(120, 365))).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "active": random.choice([True, True, True, False]),  # mostly active
    }
    nodes.append(node)

# Ensure at least one low, one high base score
nodes[0]["baseScore"] = nodes[0]["score"] = 120
nodes[1]["baseScore"] = nodes[1]["score"] = 780

# --- Create edges ---
edge_types = ["friend", "peer", "collaborator", "manager_of", "reports_to"]
def rand_event_time():
    days_ago = random.randint(1, 180)
    return (today - timedelta(days=days_ago, hours=random.randint(0,23), minutes=random.randint(0,59))).strftime("%Y-%m-%dT%H:%M:%SZ")

# Predefine a simple structure with some clusters and cross-links
edge_pairs = [
    ("P001","P002","peer"),
    ("P001","P003","collaborator"),
    ("P002","P003","friend"),
    ("P003","P004","collaborator"),
    ("P004","P005","peer"),
    ("P005","P001","collaborator"),
    ("P006","P003","peer"),
    ("P006","P007","friend"),
    ("P007","P008","peer"),
    ("P008","P009","collaborator"),
    ("P009","P010","peer"),
    ("P010","P006","collaborator"),
    # some directional hierarchy
    ("P002","P006","manager_of"),
    ("P006","P002","reports_to"),
    ("P004","P007","manager_of"),
    ("P007","P004","reports_to"),
    # cross-cluster edges
    ("P005","P008","collaborator"),
    ("P009","P001","peer"),
]

edges = []
for (src,dst,etype) in edge_pairs:
    associationRate = clamp(int(random.triangular(150, 900, 600)), 0, 900)  # skew toward medium-high
    edge = {
        "src_id": src,
        "dst_id": dst,
        "edgeType": etype,
        "associationRate": associationRate,  # 0..900
        "eventTime": rand_event_time(),
        # optional metadata for analytics
        "notes": f"Auto-generated {etype} link with strength {associationRate}",
    }
    edges.append(edge)

# Include one weak tie and one very strong tie for testing
edges.append({
    "src_id":"P003","dst_id":"P009","edgeType":"peer",
    "associationRate": 45, "eventTime": rand_event_time(), "notes":"Very weak tie"
})
edges.append({
    "src_id":"P008","dst_id":"P002","edgeType":"collaborator",
    "associationRate": 895, "eventTime": rand_event_time(), "notes":"Very strong tie"
})

# --- Params ---
params = {
    "alpha": 0.65,               # neighbor influence weight
    "beta": 0.35,                # base influence weight
    "lambdaPerType": {           # per-edge-type multipliers
        "friend": 1.0,
        "peer": 0.9,
        "collaborator": 1.1,
        "manager_of": 0.8,
        "reports_to": 0.8
    },
    "lambdaTime": 0.008,         # per-day exponential decay for recency
    "clipMin": 0,
    "clipMax": 900
}

# --- Write files (JSON arrays + JSONL) ---
with open(os.path.join(base_dir, "nodes.json"), "w", encoding="utf-8") as f:
    json.dump(nodes, f, indent=2)

with open(os.path.join(base_dir, "edges.json"), "w", encoding="utf-8") as f:
    json.dump(edges, f, indent=2)

with open(os.path.join(base_dir, "params.json"), "w", encoding="utf-8") as f:
    json.dump(params, f, indent=2)

with open(os.path.join(base_dir, "nodes.jsonl"), "w", encoding="utf-8") as f:
    for n in nodes:
        f.write(json.dumps(n) + "\n")

with open(os.path.join(base_dir, "edges.jsonl"), "w", encoding="utf-8") as f:
    for e in edges:
        f.write(json.dumps(e) + "\n")

base_dir


In [None]:
# Build an interactive Plotly network graph (ontology) and a couple of simple distributions
# from the dummy dataset we just generated.
import json, math, os
import networkx as nx
import plotly.graph_objects as go
import pandas as pd

base_dir = r"C:\Users\karan\Documents\Projects\message-credit\Test"
with open(os.path.join(base_dir, "nodes.json"), "r", encoding="utf-8") as f:
    nodes = json.load(f)
with open(os.path.join(base_dir, "edges.json"), "r", encoding="utf-8") as f:
    edges = json.load(f)

# --- Build graph (directed), but use undirected for layout ---
G = nx.DiGraph()
for n in nodes:
    G.add_node(n["id"], **n)

for e in edges:
    G.add_edge(e["src_id"], e["dst_id"], **e)

# Undirected copy for layout
GU = G.to_undirected()
pos = nx.spring_layout(GU, seed=7)  # deterministic layout for reproducibility

# --- Edge thickness scaling based on associationRate ---
assoc_rates = [G.edges[u, v]["associationRate"] for u, v in G.edges()]
w_min = min(assoc_rates) if assoc_rates else 0
w_max = max(assoc_rates) if assoc_rates else 1

def edge_width(rate):
    # Scale to [1, 8] relative to other edges
    if w_max == w_min:
        return 4
    t = (rate - w_min) / (w_max - w_min)
    return 1 + 7 * t

# --- Build Plotly edge traces (one per edge so each can have its own width) ---
edge_traces = []
for (u, v, data) in G.edges(data=True):
    x0, y0 = pos[u]
    x1, y1 = pos[v]
    rate = data.get("associationRate", 0)
    etype = data.get("edgeType", "edge")
    text = f"{u} → {v}<br>type: {etype}<br>associationRate: {rate}<br>eventTime: {data.get('eventTime','')}"
    edge_traces.append(
        go.Scatter(
            x=[x0, x1],
            y=[y0, y1],
            mode="lines",
            hoverinfo="text",
            text=[text, text],
            line=dict(width=edge_width(rate)),
            showlegend=False,
        )
    )

# --- Build Plotly node trace ---
node_x, node_y, node_text, node_size = [], [], [], []
for nid, attrs in G.nodes(data=True):
    x, y = pos[nid]
    node_x.append(x)
    node_y.append(y)
    # Size by current score (min 10, max ~30)
    score = float(attrs.get("score", 0))
    size = 10 + 20 * (score / 900.0)
    node_size.append(size)
    hover = (
        f"id: {nid}<br>"
        f"name: {attrs.get('name','')}<br>"
        f"dept: {attrs.get('department','')} | role: {attrs.get('role','')}<br>"
        f"baseScore: {attrs.get('baseScore','')} | score: {attrs.get('score','')}<br>"
        f"active: {attrs.get('active','')}"
    )
    node_text.append(hover)

nodes_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode="markers+text",
    text=[attrs.get("name","") for _, attrs in G.nodes(data=True)],
    textposition="top center",
    hoverinfo="text",
    textfont=dict(size=10),
    marker=dict(size=node_size),
    showlegend=False,
)

nodes_trace_hover = go.Scatter(
    x=node_x,
    y=node_y,
    mode="markers",
    hoverinfo="text",
    text=node_text,
    marker=dict(size=node_size),
    showlegend=False,
)

fig_net = go.Figure(edge_traces + [nodes_trace_hover, nodes_trace])
fig_net.update_layout(
    title="Interactive Ontology Graph (edge thickness = association strength)",
    hovermode="closest",
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    margin=dict(l=20, r=20, t=50, b=20),
)

# Save to HTML for sharing/downloading
net_html_path = os.path.join(base_dir, "ontology_network.html")
fig_net.write_html(net_html_path, include_plotlyjs="cdn", full_html=True)

# --- Distributions (each chart separate) ---
df_nodes = pd.DataFrame(nodes)
df_edges = pd.DataFrame(edges)

fig_scores = go.Figure(
    data=[go.Histogram(x=df_nodes["baseScore"])]
)
fig_scores.update_layout(
    title="Distribution of baseScore (nodes)",
    xaxis_title="baseScore",
    yaxis_title="count",
    margin=dict(l=20, r=20, t=50, b=20),
)

fig_assoc = go.Figure(
    data=[go.Histogram(x=df_edges["associationRate"])]
)
fig_assoc.update_layout(
    title="Distribution of associationRate (edges)",
    xaxis_title="associationRate",
    yaxis_title="count",
    margin=dict(l=20, r=20, t=50, b=20),
)

fig_net, fig_scores, fig_assoc, net_html_path


In [2]:
# Enhance the interactive Plotly graph to show richer node hover info,
# including base/individual score and top connection values per node.
import json, os
from collections import defaultdict
import networkx as nx
import plotly.graph_objects as go

base_dir = r"C:\Users\karan\Documents\Projects\message-credit\Test"
with open(os.path.join(base_dir, "nodes.json"), "r", encoding="utf-8") as f:
    nodes = json.load(f)
with open(os.path.join(base_dir, "edges.json"), "r", encoding="utf-8") as f:
    edges = json.load(f)

# Build maps for quick lookup
name_by_id = {n["id"]: n.get("name", n["id"]) for n in nodes}

# Directed graph
G = nx.DiGraph()
for n in nodes:
    G.add_node(n["id"], **n)
for e in edges:
    G.add_edge(e["src_id"], e["dst_id"], **e)

# Position using undirected layout for aesthetics
pos = nx.spring_layout(G.to_undirected(), seed=7)

# Collect neighbor info per node for hover text
neighbors_info = defaultdict(list)
for u, v, data in G.edges(data=True):
    r = data.get("associationRate", 0)
    etype = data.get("edgeType", "edge")
    # Outgoing edge u -> v
    neighbors_info[u].append({
        "dir": "→",
        "nbr": v,
        "nbrName": name_by_id.get(v, v),
        "rate": r,
        "edgeType": etype
    })
    # Incoming edge u <- v (for node v's list)
    neighbors_info[v].append({
        "dir": "←",
        "nbr": u,
        "nbrName": name_by_id.get(u, u),
        "rate": r,
        "edgeType": etype
    })

# Edge traces with thickness by associationRate
assoc_rates = [d["associationRate"] for _,_,d in G.edges(data=True)]
wmin = min(assoc_rates) if assoc_rates else 0
wmax = max(assoc_rates) if assoc_rates else 1
def width_scale(rate):
    if wmax == wmin:
        return 4
    return 1 + 7 * ((rate - wmin) / (wmax - wmin))

edge_traces = []
for (u, v, data) in G.edges(data=True):
    x0, y0 = pos[u]
    x1, y1 = pos[v]
    rate = data.get("associationRate", 0)
    etype = data.get("edgeType", "edge")
    text = (
        f"{name_by_id.get(u,u)} ({u}) → {name_by_id.get(v,v)} ({v})<br>"
        f"type: {etype}<br>"
        f"associationRate: {rate}<br>"
        f"eventTime: {data.get('eventTime','')}"
    )
    edge_traces.append(
        go.Scatter(
            x=[x0, x1],
            y=[y0, y1],
            mode="lines",
            hoverinfo="text",
            text=[text, text],
            line=dict(width=width_scale(rate)),
            showlegend=False,
        )
    )

# Node traces with rich hover
node_x, node_y, hover_texts, label_texts, sizes = [], [], [], [], []
for nid, attrs in G.nodes(data=True):
    x, y = pos[nid]
    node_x.append(x); node_y.append(y)
    label_texts.append(attrs.get("name", nid))

    base = attrs.get("baseScore", 0)
    score = attrs.get("score", 0)
    size = 10 + 20 * (float(score) / 900.0)
    sizes.append(size)

    # Summarize neighbors: sort by rate desc, take top 5
    nbrs = sorted(neighbors_info.get(nid, []), key=lambda d: d["rate"], reverse=True)
    top = nbrs[:5]
    deg_in = G.in_degree(nid)
    deg_out = G.out_degree(nid)
    deg = deg_in + deg_out
    if nbrs:
        avg_rate = sum(d["rate"] for d in nbrs) / len(nbrs)
        max_rate = max(d["rate"] for d in nbrs)
    else:
        avg_rate = 0
    # Compose hover HTML
    lines = [
        f"<b>{attrs.get('name', nid)}</b> ({nid})",
        f"Dept: {attrs.get('department','')} | Role: {attrs.get('role','')}",
        f"Base score: <b>{base}</b> | Current score: <b>{score}</b>",
        f"Connections: {deg} (in: {deg_in}, out: {deg_out}) | Avg link strength: {avg_rate:.1f}",
    ]
    if top:
        lines.append("<br><b>Top connections</b> (dir, neighbor, type, strength):")
        for t in top:
            lines.append(f"{t['dir']} {t['nbrName']} ({t['edgeType']}, {t['rate']})")
    else:
        lines.append("No connections")
    hover_texts.append("<br>".join(lines))

nodes_hover_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers",
    hoverinfo="text",
    text=hover_texts,
    marker=dict(size=sizes),
    showlegend=False,
)

nodes_label_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers+text",
    text=label_texts,
    textposition="top center",
    hoverinfo="skip",  # hover handled by nodes_hover_trace
    marker=dict(size=sizes),
    showlegend=False,
)

fig = go.Figure(edge_traces + [nodes_hover_trace, nodes_label_trace])
fig.update_layout(
    title="Interactive Ontology Graph (Hover nodes for score + connection strengths)",
    hovermode="closest",
    xaxis=dict(visible=False), yaxis=dict(visible=False),
    margin=dict(l=20, r=20, t=50, b=20),
)

html_path = os.path.join(base_dir, "ontology_network_rich_hover.html")
fig.write_html(html_path, include_plotlyjs="cdn", full_html=True)

html_path


'C:\\Users\\karan\\Documents\\Projects\\message-credit\\Test\\ontology_network_rich_hover.html'

In [3]:
# Rebuild the Plotly graph so:
# - Hovering a NODE shows only node-level info (no connection values)
# - Hovering an EDGE shows edge-level info (connection value, type, etc.)
import json, os
import networkx as nx
import plotly.graph_objects as go

base_dir = r"C:\Users\karan\Documents\Projects\message-credit\Test"
with open(os.path.join(base_dir, "nodes.json"), "r", encoding="utf-8") as f:
    nodes = json.load(f)
with open(os.path.join(base_dir, "edges.json"), "r", encoding="utf-8") as f:
    edges = json.load(f)

# Build directed graph, use undirected for layout
G = nx.DiGraph()
for n in nodes:
    G.add_node(n["id"], **n)
for e in edges:
    G.add_edge(e["src_id"], e["dst_id"], **e)

pos = nx.spring_layout(G.to_undirected(), seed=7)

# Edge thickness scaling
assoc_rates = [d["associationRate"] for _,_,d in G.edges(data=True)]
wmin = min(assoc_rates) if assoc_rates else 0
wmax = max(assoc_rates) if assoc_rates else 1
def width_scale(rate):
    if wmax == wmin:
        return 4
    return 1 + 7 * ((rate - wmin) / (wmax - wmin))

# Build edge traces (hover: edge-only info)
name_by_id = {n["id"]: n.get("name", n["id"]) for n in nodes}
edge_traces = []
for u, v, data in G.edges(data=True):
    x0, y0 = pos[u]; x1, y1 = pos[v]
    rate = data.get("associationRate", 0)
    etype = data.get("edgeType", "edge")
    text = (
        f"<b>Connection</b><br>"
        f"{name_by_id.get(u,u)} ({u}) → {name_by_id.get(v,v)} ({v})<br>"
        f"type: {etype}<br>"
        f"connection value: <b>{rate}</b><br>"
        f"eventTime: {data.get('eventTime','')}<br>"
        f"line width (relative): {width_scale(rate):.1f}"
    )
    edge_traces.append(
        go.Scatter(
            x=[x0, x1],
            y=[y0, y1],
            mode="lines",
            hoverinfo="text",
            text=[text, text],
            line=dict(width=width_scale(rate)),
            showlegend=False,
        )
    )

# Build node traces:
#  - One for hover (markers only, with node info)
#  - One for labels (text+markers, hover disabled)
node_x, node_y, node_hover, node_labels, node_sizes = [], [], [], [], []
for nid, attrs in G.nodes(data=True):
    x, y = pos[nid]
    node_x.append(x); node_y.append(y)
    node_labels.append(attrs.get("name", nid))
    score = float(attrs.get("score", 0))
    size = 10 + 20 * (score / 900.0)
    node_sizes.append(size)
    node_hover.append(
        "<br>".join([
            f"<b>{attrs.get('name', nid)}</b> ({nid})",
            f"Dept: {attrs.get('department','')} | Role: {attrs.get('role','')}",
            f"Base score: <b>{attrs.get('baseScore', 0)}</b>",
            f"Current score: <b>{attrs.get('score', 0)}</b>",
            f"Status: {'Active' if attrs.get('active') else 'Inactive'}",
        ])
    )

nodes_hover_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers",
    hoverinfo="text",
    text=node_hover,
    marker=dict(size=node_sizes),
    showlegend=False,
)

nodes_label_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers+text",
    text=node_labels,
    textposition="top center",
    hoverinfo="skip",
    marker=dict(size=node_sizes),
    showlegend=False,
)

fig = go.Figure(edge_traces + [nodes_hover_trace, nodes_label_trace])
fig.update_layout(
    title="Interactive Ontology Graph (Node hover = node info, Edge hover = connection info)",
    hovermode="closest",
    xaxis=dict(visible=False), yaxis=dict(visible=False),
    margin=dict(l=20, r=20, t=50, b=20),
)

html_path = os.path.join(base_dir, "ontology_network_clean_hover.html")
fig.write_html(html_path, include_plotlyjs="cdn", full_html=True)

html_path


'C:\\Users\\karan\\Documents\\Projects\\message-credit\\Test\\ontology_network_clean_hover.html'

In [6]:
# Update edges to represent loans and recompute strength per user spec:
# strength ∝ amount_sent_on_edge and ∝ 1 / total_amount_sent_by_source
# Then regenerate JSON files and an interactive Plotly graph with clean hovers.

import json, os, random, math
from collections import defaultdict
import networkx as nx
import plotly.graph_objects as go

base_dir = r"C:\Users\karan\Documents\Projects\message-credit\Test"

# --- Load existing ---
with open(os.path.join(base_dir, "nodes.json"), "r", encoding="utf-8") as f:
    nodes = json.load(f)
with open(os.path.join(base_dir, "edges.json"), "r", encoding="utf-8") as f:
    edges = json.load(f)

# --- Assign loan amounts to edges (USD) ---
random.seed(123)
for e in edges:
    # Heavy-tailed distribution for realistic loan sizes
    amt = int(random.lognormvariate(9.5, 1.0))  # around tens of thousands typical
    amt = max(500, min(250000, amt))
    # round to nearest 100
    e["loanAmountUSD"] = int(round(amt / 100.0) * 100)
    e["loanCurrency"] = "USD"
    e["notes"] = f"Loan from {e['src_id']} to {e['dst_id']} amount ${e['loanAmountUSD']:,}"

# --- Compute totals per source and per target ---
total_sent = defaultdict(int)
total_recv = defaultdict(int)
for e in edges:
    total_sent[e["src_id"]] += e["loanAmountUSD"]
    total_recv[e["dst_id"]] += e["loanAmountUSD"]

# --- Recompute strength ("associationRate") ---
# Normalize per source so sum_out_strength(src) = 900 (if src has outgoing edges)
for e in edges:
    src = e["src_id"]
    amt = e["loanAmountUSD"]
    tot = total_sent[src]
    if tot > 0:
        strength = 900.0 * amt / tot
    else:
        strength = 0.0
    e["associationRate"] = int(round(strength))  # keep same field name for plotting
    e["strengthNormalizedPerSource"] = True

# --- Update node aggregates ---
node_by_id = {n["id"]: n for n in nodes}
for n in nodes:
    sid = n["id"]
    n["totalSentUSD"] = int(total_sent.get(sid, 0))
    n["totalReceivedUSD"] = int(total_recv.get(sid, 0))
    n["netFlowUSD"] = int(n["totalSentUSD"] - n["totalReceivedUSD"])
    n["outDegree"] = sum(1 for e in edges if e["src_id"] == sid)
    n["inDegree"] = sum(1 for e in edges if e["dst_id"] == sid)

# --- Save updated datasets ---
with open(os.path.join(base_dir, "nodes_loans.json"), "w", encoding="utf-8") as f:
    json.dump(nodes, f, indent=2)
with open(os.path.join(base_dir, "edges_loans.json"), "w", encoding="utf-8") as f:
    json.dump(edges, f, indent=2)

with open(os.path.join(base_dir, "nodes_loans.jsonl"), "w", encoding="utf-8") as f:
    for n in nodes:
        f.write(json.dumps(n) + "\n")
with open(os.path.join(base_dir, "edges_loans.jsonl"), "w", encoding="utf-8") as f:
    for e in edges:
        f.write(json.dumps(e) + "\n")

# --- Build interactive Plotly network (clean hover separation) ---
G = nx.DiGraph()
for n in nodes:
    G.add_node(n["id"], **n)
for e in edges:
    G.add_edge(e["src_id"], e["dst_id"], **e)

pos = nx.spring_layout(G.to_undirected(), seed=7)

# Edge widths from associationRate
assoc_rates = [d["associationRate"] for _,_,d in G.edges(data=True)]
wmin = min(assoc_rates) if assoc_rates else 0
wmax = max(assoc_rates) if assoc_rates else 1
def width_scale(rate):
    if wmax == wmin:
        return 4
    return 1 + 7 * ((rate - wmin) / (wmax - wmin))

name_by_id = {n["id"]: n.get("name", n["id"]) for n in nodes}

edge_traces = []
for u, v, data in G.edges(data=True):
    x0, y0 = pos[u]; x1, y1 = pos[v]
    rate = data.get("associationRate", 0)
    amt = data.get("loanAmountUSD", 0)
    etype = data.get("edgeType", "loan")
    text = (
        f"<b>Loan Edge</b><br>"
        f"{name_by_id.get(u,u)} ({u}) → {name_by_id.get(v,v)} ({v})<br>"
        f"type: {etype}<br>"
        f"loan amount: <b>${amt:,.0f}</b><br>"
        f"strength (normalized per source): <b>{rate}</b> / 900<br>"
        f"note: strength = 900 × amount / totalSent(source)<br>"
        f"eventTime: {data.get('eventTime','')}"
    )
    edge_traces.append(
        go.Scatter(
            x=[x0, x1],
            y=[y0, y1],
            mode="lines",
            hoverinfo="text",
            text=[text, text],
            line=dict(width=width_scale(rate)),
            showlegend=False,
        )
    )

# Node traces: hover shows aggregate loan stats
node_x, node_y, node_hover, node_labels, node_sizes = [], [], [], [], []
for nid, attrs in G.nodes(data=True):
    x, y = pos[nid]
    node_x.append(x); node_y.append(y)
    node_labels.append(attrs.get("name", nid))
    score = float(attrs.get("score", 0))
    size = 10 + 20 * (score / 900.0)
    node_sizes.append(size)
    ts = attrs.get("totalSentUSD", 0)
    tr = attrs.get("totalReceivedUSD", 0)
    net = attrs.get("netFlowUSD", 0)
    node_hover.append(
        "<br>".join([
            f"<b>{attrs.get('name', nid)}</b> ({nid})",
            f"Dept: {attrs.get('department','')} | Role: {attrs.get('role','')}",
            f"Base score: <b>{attrs.get('baseScore', 0)}</b> | Current score: <b>{attrs.get('score', 0)}</b>",
            f"Loans sent: <b>${ts:,.0f}</b> | Loans received: <b>${tr:,.0f}</b> | Net flow: <b>${net:,.0f}</b>",
            f"Degree (out/in): {attrs.get('outDegree',0)}/{attrs.get('inDegree',0)}",
            "Edge strength = 900 × amount / totalSent(source)"
        ])
    )

nodes_hover_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers",
    hoverinfo="text",
    text=node_hover,
    marker=dict(size=node_sizes),
    showlegend=False,
)
nodes_label_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers+text",
    text=node_labels,
    textposition="top center",
    hoverinfo="skip",
    marker=dict(size=node_sizes),
    showlegend=False,
)

fig = go.Figure(edge_traces + [nodes_hover_trace, nodes_label_trace])
fig.update_layout(
    title="Loans Ontology Graph (edge width ∝ normalized strength; node hover = aggregates)",
    hovermode="closest",
    xaxis=dict(visible=False), yaxis=dict(visible=False),
    margin=dict(l=20, r=20, t=60, b=20),
)

html_path = os.path.join(base_dir, "ontology_network_loans.html")
fig.write_html(html_path, include_plotlyjs="cdn", full_html=True)

# Provide paths for the assistant to link
{
    "nodes_json": os.path.join(base_dir, "nodes_loans.json"),
    "edges_json": os.path.join(base_dir, "edges_loans.json"),
    "graph_html": html_path
}


{'nodes_json': 'C:\\Users\\karan\\Documents\\Projects\\message-credit\\Test\\nodes_loans.json',
 'edges_json': 'C:\\Users\\karan\\Documents\\Projects\\message-credit\\Test\\edges_loans.json',
 'graph_html': 'C:\\Users\\karan\\Documents\\Projects\\message-credit\\Test\\ontology_network_loans.html'}

In [5]:
# Enhance the interactive Plotly graph to show richer node hover info,
# including base/individual score and top connection values per node,
# UPDATED for loans: edge strength ∝ amount and ∝ 1 / total sent by source.

import json, os
from collections import defaultdict
import networkx as nx
import plotly.graph_objects as go

base_dir = r"C:\Users\karan\Documents\Projects\message-credit\Test"
with open(os.path.join(base_dir, "nodes.json"), "r", encoding="utf-8") as f:
    nodes = json.load(f)
with open(os.path.join(base_dir, "edges.json"), "r", encoding="utf-8") as f:
    edges = json.load(f)

# ---------- Loan-aware preprocessing ----------
# Ensure defaults
for e in edges:
    e.setdefault("edgeType", "loan")
    e.setdefault("loanAmountUSD", 0)

# Totals per source/target
total_sent = defaultdict(int)
total_recv = defaultdict(int)
for e in edges:
    total_sent[e["src_id"]] += int(e.get("loanAmountUSD", 0))
    total_recv[e["dst_id"]] += int(e.get("loanAmountUSD", 0))

# Normalized strength per source: 900 * amount / totalSent(source)
for e in edges:
    amt = int(e.get("loanAmountUSD", 0))
    tot = total_sent.get(e["src_id"], 0)
    if tot > 0:
        e["normStrength"] = 900.0 * amt / tot
    else:
        # fallback to any existing associationRate if present, else 0
        e["normStrength"] = float(e.get("associationRate", 0))

name_by_id = {n["id"]: n.get("name", n["id"]) for n in nodes}

# Directed graph
G = nx.DiGraph()
for n in nodes:
    # augment nodes with aggregates for hover
    nid = n["id"]
    n["totalSentUSD"] = int(total_sent.get(nid, 0))
    n["totalReceivedUSD"] = int(total_recv.get(nid, 0))
    n["netFlowUSD"] = n["totalSentUSD"] - n["totalReceivedUSD"]
    G.add_node(nid, **n)

for e in edges:
    G.add_edge(e["src_id"], e["dst_id"], **e)

# Layout (use undirected for aesthetics)
pos = nx.spring_layout(G.to_undirected(), seed=7)

# ---------- Build neighbor summaries for node hover ----------
neighbors_info = defaultdict(list)
for u, v, data in G.edges(data=True):
    amt = int(data.get("loanAmountUSD", 0))
    s   = float(data.get("normStrength", 0.0))
    et  = data.get("edgeType", "loan")
    # Outgoing u -> v
    neighbors_info[u].append({
        "dir": "→",
        "nbr": v,
        "nbrName": name_by_id.get(v, v),
        "amount": amt,
        "strength": s,
        "edgeType": et
    })
    # Incoming u <- v
    neighbors_info[v].append({
        "dir": "←",
        "nbr": u,
        "nbrName": name_by_id.get(u, u),
        "amount": amt,
        "strength": s,
        "edgeType": et
    })

def money(x): return f"${x:,.0f}"

# ---------- Edge traces (hover shows ONLY edge/loan info) ----------
strengths = [d.get("normStrength", 0.0) for _,_,d in G.edges(data=True)]
wmin = min(strengths) if strengths else 0.0
wmax = max(strengths) if strengths else 1.0
def width_scale(s):
    if wmax == wmin:
        return 4
    t = (s - wmin) / (wmax - wmin)
    return 1 + 7 * t  # 1..8 px

edge_traces = []
for u, v, data in G.edges(data=True):
    x0, y0 = pos[u]; x1, y1 = pos[v]
    amt = int(data.get("loanAmountUSD", 0))
    s   = float(data.get("normStrength", 0.0))
    et  = data.get("edgeType", "loan")
    text = (
        f"<b>Loan</b><br>"
        f"{name_by_id.get(u,u)} ({u}) → {name_by_id.get(v,v)} ({v})<br>"
        f"type: {et}<br>"
        f"amount: <b>{money(amt)}</b><br>"
        f"strength (per-source norm): <b>{s:.1f}</b> / 900<br>"
        f"eventTime: {data.get('eventTime','')}"
    )
    edge_traces.append(
        go.Scatter(
            x=[x0, x1], y=[y0, y1],
            mode="lines",
            hoverinfo="text",
            text=[text, text],
            line=dict(width=width_scale(s)),
            showlegend=False,
        )
    )

# ---------- Node traces (hover shows ONLY node info + top connections with amounts) ----------
node_x, node_y, hover_texts, label_texts, sizes = [], [], [], [], []
for nid, attrs in G.nodes(data=True):
    x, y = pos[nid]
    node_x.append(x); node_y.append(y)
    label_texts.append(attrs.get("name", nid))

    base = attrs.get("baseScore", 0)
    score = attrs.get("score", 0)
    size = 10 + 20 * (float(score) / 900.0)
    sizes.append(size)

    out_deg = G.out_degree(nid)
    in_deg  = G.in_degree(nid)
    nbrs = neighbors_info.get(nid, [])

    # Sort by amount desc (then strength), show top 5
    top = sorted(nbrs, key=lambda d: (d["amount"], d["strength"]), reverse=True)[:5]
    if nbrs:
        avg_strength = sum(d["strength"] for d in nbrs) / len(nbrs)
    else:
        avg_strength = 0.0

    lines = [
        f"<b>{attrs.get('name', nid)}</b> ({nid})",
        f"Dept: {attrs.get('department','')} | Role: {attrs.get('role','')}",
        f"Base score: <b>{base}</b> | Current score: <b>{score}</b>",
        f"Loans sent: <b>{money(attrs.get('totalSentUSD',0))}</b> | "
        f"Loans received: <b>{money(attrs.get('totalReceivedUSD',0))}</b> | "
        f"Net: <b>{money(attrs.get('netFlowUSD',0))}</b>",
        f"Connections: {in_deg + out_deg} (in: {in_deg}, out: {out_deg}) | "
        f"Avg link strength: {avg_strength:.1f}",
    ]
    if top:
        lines.append("<br><b>Top connections</b> (dir, neighbor, type, amount, strength):")
        for t in top:
            lines.append(f"{t['dir']} {t['nbrName']} ({t['edgeType']}, {money(t['amount'])}, {t['strength']:.1f}/900)")
    else:
        lines.append("No connections")
    hover_texts.append("<br>".join(lines))

nodes_hover_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers",
    hoverinfo="text",
    text=hover_texts,
    marker=dict(size=sizes),
    showlegend=False,
)

nodes_label_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers+text",
    text=label_texts,
    textposition="top center",
    hoverinfo="skip",  # hover handled by nodes_hover_trace
    marker=dict(size=sizes),
    showlegend=False,
)

fig = go.Figure(edge_traces + [nodes_hover_trace, nodes_label_trace])
fig.update_layout(
    title="Loans Ontology Graph (Node hover shows totals & top loans; Edge hover shows specific loan)",
    hovermode="closest",
    xaxis=dict(visible=False), yaxis=dict(visible=False),
    margin=dict(l=20, r=20, t=50, b=20),
)

html_path = os.path.join(base_dir, "ontology_network_loans.html")
fig.write_html(html_path, include_plotlyjs="cdn", full_html=True)

print(html_path)


C:\Users\karan\Documents\Projects\message-credit\Test\ontology_network_loans.html
