In [None]:
# Generate a dummy Foundry-style ontology dataset with nodes, edges, and params.
import json, os, random, math
from datetime import datetime, timedelta

random.seed(42)

base_dir = "/mnt/data/foundry_ontology_dummy"
os.makedirs(base_dir, exist_ok=True)

today = datetime(2025, 8, 23)

# --- Create nodes ---
people = [
    ("P001", "Alex Kim"),
    ("P002", "Jordan Lee"),
    ("P003", "Riley Patel"),
    ("P004", "Samira Chen"),
    ("P005", "Diego Alvarez"),
    ("P006", "Priya Singh"),
    ("P007", "Morgan Davis"),
    ("P008", "Chen Wei"),
    ("P009", "Taylor Brooks"),
    ("P010", "Casey Morgan"),
]

departments = ["Trading", "Compliance", "Engineering", "Risk", "Operations"]
roles = ["Analyst", "Manager", "Director", "Engineer", "Associate"]

def clamp(x, lo, hi): return max(lo, min(hi, x))

nodes = []
for pid, name in people:
    base = clamp(int(random.gauss(450, 180)), 0, 900)  # spread across 0-900
    node = {
        "id": pid,
        "entityType": "Person",
        "name": name,
        "department": random.choice(departments),
        "role": random.choice(roles),
        "baseScore": base,
        # initial score = base; your pipeline will recompute this
        "score": base,
        "createdAt": (today - timedelta(days=random.randint(120, 365))).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "active": random.choice([True, True, True, False]),  # mostly active
    }
    nodes.append(node)

# Ensure at least one low, one high base score
nodes[0]["baseScore"] = nodes[0]["score"] = 120
nodes[1]["baseScore"] = nodes[1]["score"] = 780

# --- Create edges ---
edge_types = ["friend", "peer", "collaborator", "manager_of", "reports_to"]
def rand_event_time():
    days_ago = random.randint(1, 180)
    return (today - timedelta(days=days_ago, hours=random.randint(0,23), minutes=random.randint(0,59))).strftime("%Y-%m-%dT%H:%M:%SZ")

# Predefine a simple structure with some clusters and cross-links
edge_pairs = [
    ("P001","P002","peer"),
    ("P001","P003","collaborator"),
    ("P002","P003","friend"),
    ("P003","P004","collaborator"),
    ("P004","P005","peer"),
    ("P005","P001","collaborator"),
    ("P006","P003","peer"),
    ("P006","P007","friend"),
    ("P007","P008","peer"),
    ("P008","P009","collaborator"),
    ("P009","P010","peer"),
    ("P010","P006","collaborator"),
    # some directional hierarchy
    ("P002","P006","manager_of"),
    ("P006","P002","reports_to"),
    ("P004","P007","manager_of"),
    ("P007","P004","reports_to"),
    # cross-cluster edges
    ("P005","P008","collaborator"),
    ("P009","P001","peer"),
]

edges = []
for (src,dst,etype) in edge_pairs:
    associationRate = clamp(int(random.triangular(150, 900, 600)), 0, 900)  # skew toward medium-high
    edge = {
        "src_id": src,
        "dst_id": dst,
        "edgeType": etype,
        "associationRate": associationRate,  # 0..900
        "eventTime": rand_event_time(),
        # optional metadata for analytics
        "notes": f"Auto-generated {etype} link with strength {associationRate}",
    }
    edges.append(edge)

# Include one weak tie and one very strong tie for testing
edges.append({
    "src_id":"P003","dst_id":"P009","edgeType":"peer",
    "associationRate": 45, "eventTime": rand_event_time(), "notes":"Very weak tie"
})
edges.append({
    "src_id":"P008","dst_id":"P002","edgeType":"collaborator",
    "associationRate": 895, "eventTime": rand_event_time(), "notes":"Very strong tie"
})

# --- Params ---
params = {
    "alpha": 0.65,               # neighbor influence weight
    "beta": 0.35,                # base influence weight
    "lambdaPerType": {           # per-edge-type multipliers
        "friend": 1.0,
        "peer": 0.9,
        "collaborator": 1.1,
        "manager_of": 0.8,
        "reports_to": 0.8
    },
    "lambdaTime": 0.008,         # per-day exponential decay for recency
    "clipMin": 0,
    "clipMax": 900
}

# --- Write files (JSON arrays + JSONL) ---
with open(os.path.join(base_dir, "nodes.json"), "w", encoding="utf-8") as f:
    json.dump(nodes, f, indent=2)

with open(os.path.join(base_dir, "edges.json"), "w", encoding="utf-8") as f:
    json.dump(edges, f, indent=2)

with open(os.path.join(base_dir, "params.json"), "w", encoding="utf-8") as f:
    json.dump(params, f, indent=2)

with open(os.path.join(base_dir, "nodes.jsonl"), "w", encoding="utf-8") as f:
    for n in nodes:
        f.write(json.dumps(n) + "\n")

with open(os.path.join(base_dir, "edges.jsonl"), "w", encoding="utf-8") as f:
    for e in edges:
        f.write(json.dumps(e) + "\n")

base_dir


In [1]:
# Build an interactive Plotly network graph (ontology) and a couple of simple distributions
# from the dummy dataset we just generated.
import json, math, os
import networkx as nx
import plotly.graph_objects as go
import pandas as pd

base_dir = r"C:\Users\karan\Documents\Projects\message-credit\Test"
with open(os.path.join(base_dir, "nodes.json"), "r", encoding="utf-8") as f:
    nodes = json.load(f)
with open(os.path.join(base_dir, "edges.json"), "r", encoding="utf-8") as f:
    edges = json.load(f)

# --- Build graph (directed), but use undirected for layout ---
G = nx.DiGraph()
for n in nodes:
    G.add_node(n["id"], **n)

for e in edges:
    G.add_edge(e["src_id"], e["dst_id"], **e)

# Undirected copy for layout
GU = G.to_undirected()
pos = nx.spring_layout(GU, seed=7)  # deterministic layout for reproducibility

# --- Edge thickness scaling based on associationRate ---
assoc_rates = [G.edges[u, v]["associationRate"] for u, v in G.edges()]
w_min = min(assoc_rates) if assoc_rates else 0
w_max = max(assoc_rates) if assoc_rates else 1

def edge_width(rate):
    # Scale to [1, 8] relative to other edges
    if w_max == w_min:
        return 4
    t = (rate - w_min) / (w_max - w_min)
    return 1 + 7 * t

# --- Build Plotly edge traces (one per edge so each can have its own width) ---
edge_traces = []
for (u, v, data) in G.edges(data=True):
    x0, y0 = pos[u]
    x1, y1 = pos[v]
    rate = data.get("associationRate", 0)
    etype = data.get("edgeType", "edge")
    text = f"{u} → {v}<br>type: {etype}<br>associationRate: {rate}<br>eventTime: {data.get('eventTime','')}"
    edge_traces.append(
        go.Scatter(
            x=[x0, x1],
            y=[y0, y1],
            mode="lines",
            hoverinfo="text",
            text=[text, text],
            line=dict(width=edge_width(rate)),
            showlegend=False,
        )
    )

# --- Build Plotly node trace ---
node_x, node_y, node_text, node_size = [], [], [], []
for nid, attrs in G.nodes(data=True):
    x, y = pos[nid]
    node_x.append(x)
    node_y.append(y)
    # Size by current score (min 10, max ~30)
    score = float(attrs.get("score", 0))
    size = 10 + 20 * (score / 900.0)
    node_size.append(size)
    hover = (
        f"id: {nid}<br>"
        f"name: {attrs.get('name','')}<br>"
        f"dept: {attrs.get('department','')} | role: {attrs.get('role','')}<br>"
        f"baseScore: {attrs.get('baseScore','')} | score: {attrs.get('score','')}<br>"
        f"active: {attrs.get('active','')}"
    )
    node_text.append(hover)

nodes_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode="markers+text",
    text=[attrs.get("name","") for _, attrs in G.nodes(data=True)],
    textposition="top center",
    hoverinfo="text",
    textfont=dict(size=10),
    marker=dict(size=node_size),
    showlegend=False,
)

nodes_trace_hover = go.Scatter(
    x=node_x,
    y=node_y,
    mode="markers",
    hoverinfo="text",
    text=node_text,
    marker=dict(size=node_size),
    showlegend=False,
)

fig_net = go.Figure(edge_traces + [nodes_trace_hover, nodes_trace])
fig_net.update_layout(
    title="Interactive Ontology Graph (edge thickness = association strength)",
    hovermode="closest",
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    margin=dict(l=20, r=20, t=50, b=20),
)

# Save to HTML for sharing/downloading
net_html_path = os.path.join(base_dir, "ontology_network.html")
fig_net.write_html(net_html_path, include_plotlyjs="cdn", full_html=True)

# --- Distributions (each chart separate) ---
df_nodes = pd.DataFrame(nodes)
df_edges = pd.DataFrame(edges)

fig_scores = go.Figure(
    data=[go.Histogram(x=df_nodes["baseScore"])]
)
fig_scores.update_layout(
    title="Distribution of baseScore (nodes)",
    xaxis_title="baseScore",
    yaxis_title="count",
    margin=dict(l=20, r=20, t=50, b=20),
)

fig_assoc = go.Figure(
    data=[go.Histogram(x=df_edges["associationRate"])]
)
fig_assoc.update_layout(
    title="Distribution of associationRate (edges)",
    xaxis_title="associationRate",
    yaxis_title="count",
    margin=dict(l=20, r=20, t=50, b=20),
)

fig_net, fig_scores, fig_assoc, net_html_path


(Figure({
     'data': [{'hoverinfo': 'text',
               'line': {'width': 5.916470588235295},
               'mode': 'lines',
               'showlegend': False,
               'text': [P001 → P002<br>type: peer<br>associationRate:
                        642<br>eventTime: 2025-08-10T02:46:00Z, P001 → P002<br>type:
                        peer<br>associationRate: 642<br>eventTime:
                        2025-08-10T02:46:00Z],
               'type': 'scatter',
               'x': [-0.32419911115609956, 0.06547661391597188],
               'y': [0.4712859283179857, 0.28306816856673045]},
              {'hoverinfo': 'text',
               'line': {'width': 6.18},
               'mode': 'lines',
               'showlegend': False,
               'text': [P001 → P003<br>type: collaborator<br>associationRate:
                        674<br>eventTime: 2025-08-01T16:05:00Z, P001 → P003<br>type:
                        collaborator<br>associationRate: 674<br>eventTime:
                   