# Knowledge Graph Visualization and Hallucination Detection

This notebook visualizes the Hive Fleet Obsidian knowledge graph to help identify potential hallucinations (disconnected or anomalous nodes) and understand the current memory structure.

We will load data from:
- `memory/semantic/knowledge_graph/evolution_graph.gml`
- `memory/semantic/knowledge_graph.json` (if available)

In [1]:
# 1. Install and Import Dependencies
import sys
!{sys.executable} -m pip install networkx matplotlib pandas pyvis scikit-learn

import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import json
import os
from pathlib import Path
from pyvis.network import Network
import numpy as np

# Set paths
WORKSPACE_ROOT = Path("..") # Assuming notebook is in notebooks/
GML_PATH = WORKSPACE_ROOT / "memory/semantic/knowledge_graph/evolution_graph.gml"
JSON_PATH = WORKSPACE_ROOT / "memory/semantic/knowledge_graph.json"

print(f"Checking paths:\n{GML_PATH}: {GML_PATH.exists()}\n{JSON_PATH}: {JSON_PATH.exists()}")

ERROR:no viable alternative at input '1' (1.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'and' (1.sysml line : 1 column : 14)
ERROR:no viable alternative at input 'Dependencies' (1.sysml line : 1 column : 25)
ERROR:no viable alternative at input 'import' (1.sysml line : 2 column : 1)
ERROR:no viable alternative at character '{' (1.sysml line : 3 column : 1)
ERROR:no viable alternative at input '.' (1.sysml line : 3 column : 6)
ERROR:no viable alternative at input '}' (1.sysml line : 3 column : 17)
ERROR:missing EOF at '-' (1.sysml line : 3 column : 19)




In [2]:
# 2. Load Knowledge Graph Data

G = None

# Try loading GML first as it seems to be the primary evolution graph
if GML_PATH.exists():
    try:
        print(f"Loading GML from {GML_PATH}...")
        G = nx.read_gml(GML_PATH)
        print(f"Successfully loaded GML. Nodes: {len(G.nodes)}, Edges: {len(G.edges)}")
    except Exception as e:
        print(f"Error loading GML: {e}")

# If GML failed or we want to merge/compare, try JSON
if JSON_PATH.exists():
    try:
        print(f"Loading JSON from {JSON_PATH}...")
        with open(JSON_PATH, 'r') as f:
            data = json.load(f)
        
        G_json = nx.node_link_graph(data)
        print(f"Successfully loaded JSON. Nodes: {len(G_json.nodes)}, Edges: {len(G_json.edges)}")
        
        if G is None:
            G = G_json
        else:
            print("Both graphs found. Using GML as primary for now.")
            # You could merge them here if needed: G = nx.compose(G, G_json)
            
    except Exception as e:
        print(f"Error loading JSON: {e}")

if G is None:
    print("No graph data found. Creating a sample graph for demonstration.")
    G = nx.gnm_random_graph(20, 30, directed=True)
    for i in G.nodes:
        G.nodes[i]['label'] = f"Node_{i}"


ERROR:no viable alternative at input '2' (2.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'Knowledge' (2.sysml line : 1 column : 11)
ERROR:no viable alternative at input 'Graph' (2.sysml line : 1 column : 21)
ERROR:no viable alternative at input 'Data' (2.sysml line : 1 column : 27)
ERROR:no viable alternative at input 'G' (2.sysml line : 3 column : 1)
ERROR:missing '(' at 'Try' (2.sysml line : 5 column : 3)
ERROR:no viable alternative at input 'GML' (2.sysml line : 5 column : 15)
ERROR:no viable alternative at input 'first' (2.sysml line : 5 column : 19)
ERROR:no viable alternative at input 'seems' (2.sysml line : 5 column : 31)
ERROR:no viable alternative at input 'to' (2.sysml line : 5 column : 37)
ERROR:no viable alternative at input 'the' (2.sysml line : 5 column : 43)
ERROR:no viable alternative at input 'primary' (2.sysml line : 5 column : 47)
ERROR:no viable alternative at input 'evolution' (2.sysml line : 5 column : 55)
ERROR:no viable alternative at input 'g



In [3]:
# 3. Visualize the Knowledge Graph (Static)

plt.figure(figsize=(15, 10))
pos = nx.spring_layout(G, k=0.15, iterations=20)

# Draw nodes
nx.draw_networkx_nodes(G, pos, node_size=50, node_color="skyblue", alpha=0.7)

# Draw edges
nx.draw_networkx_edges(G, pos, width=0.5, alpha=0.5, edge_color="gray")

# Draw labels (only for high degree nodes to avoid clutter)
degrees = dict(G.degree())
top_nodes = sorted(degrees, key=degrees.get, reverse=True)[:20]
labels = {n: G.nodes[n].get('label', n) for n in top_nodes}
nx.draw_networkx_labels(G, pos, labels, font_size=8)

plt.title("Knowledge Graph Visualization (Static)")
plt.axis("off")
plt.show()

ERROR:no viable alternative at input '3' (3.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'the' (3.sysml line : 1 column : 16)
ERROR:no viable alternative at input 'Knowledge' (3.sysml line : 1 column : 20)
ERROR:no viable alternative at input 'Graph' (3.sysml line : 1 column : 30)
ERROR:no viable alternative at input '(' (3.sysml line : 1 column : 36)
ERROR:no viable alternative at input ')' (3.sysml line : 1 column : 43)
ERROR:no viable alternative at input '.' (3.sysml line : 3 column : 4)
ERROR:no viable alternative at input '(' (3.sysml line : 3 column : 11)
ERROR:no viable alternative at input ')' (3.sysml line : 3 column : 28)
ERROR:no viable alternative at input '=' (3.sysml line : 4 column : 27)
ERROR:no viable alternative at input ')' (3.sysml line : 4 column : 48)
ERROR:no viable alternative at input 'nx' (3.sysml line : 7 column : 1)
ERROR:no viable alternative at input '.' (3.sysml line : 7 column : 3)
ERROR:no viable alternative at input '(' (3.sysml lin



In [4]:
# 4. Analyze Graph Statistics & Centrality

print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.4f}")

if nx.is_directed(G):
    print(f"Is strongly connected: {nx.is_strongly_connected(G)}")
    print(f"Number of strongly connected components: {nx.number_strongly_connected_components(G)}")
    print(f"Number of weakly connected components: {nx.number_weakly_connected_components(G)}")
else:
    print(f"Is connected: {nx.is_connected(G)}")
    print(f"Number of connected components: {nx.number_connected_components(G)}")

# Centrality
degree_centrality = nx.degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)

# Top 10 central nodes
print("\nTop 10 Nodes by Degree Centrality:")
for node in sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:10]:
    print(f"{node} ({G.nodes[node].get('label', node)}): {degree_centrality[node]:.4f}")


ERROR:no viable alternative at input '4' (4.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'Graph' (4.sysml line : 1 column : 14)
ERROR:no viable alternative at input 'Statistics' (4.sysml line : 1 column : 20)
ERROR:no viable alternative at input '&' (4.sysml line : 1 column : 31)
ERROR:no viable alternative at input 'print' (4.sysml line : 3 column : 1)
ERROR:no viable alternative at input '(' (4.sysml line : 3 column : 6)
ERROR:no viable alternative at input '"Number of nodes: {G.number_of_nodes()}"' (4.sysml line : 3 column : 8)
ERROR:no viable alternative at input '(' (4.sysml line : 4 column : 6)
ERROR:no viable alternative at input '"Number of edges: {G.number_of_edges()}"' (4.sysml line : 4 column : 8)
ERROR:no viable alternative at input '(' (4.sysml line : 5 column : 6)
ERROR:no viable alternative at input '"Density: {nx.density(G):.4f}"' (4.sysml line : 5 column : 8)
ERROR:no viable alternative at input '.' (4.sysml line : 7 column : 6)
ERROR:no viable alter



In [5]:
# 5. Detect Disconnected Subgraphs (Potential Hallucinations)

# Identify weakly connected components (for directed graphs) or connected components
if nx.is_directed(G):
    components = list(nx.weakly_connected_components(G))
else:
    components = list(nx.connected_components(G))

print(f"Found {len(components)} disconnected components.")

# Sort components by size
components.sort(key=len, reverse=True)

# The largest component is likely the "Main" knowledge base
main_component = components[0]
print(f"Main component size: {len(main_component)}")

# Smaller components might be hallucinations or isolated knowledge islands
hallucination_candidates = components[1:]
print(f"Potential hallucination islands: {len(hallucination_candidates)}")

for i, comp in enumerate(hallucination_candidates[:10]):
    print(f"  Island {i+1} (Size {len(comp)}): {[G.nodes[n].get('label', n) for n in list(comp)[:5]]}...")

# Visualize these islands
if hallucination_candidates:
    # Create a subgraph of just the islands (limit to top 50 islands to avoid crashing)
    island_nodes = set()
    for comp in hallucination_candidates[:50]:
        island_nodes.update(comp)
    
    H = G.subgraph(island_nodes)
    
    plt.figure(figsize=(12, 8))
    pos_h = nx.spring_layout(H)
    nx.draw(H, pos_h, with_labels=True, node_size=100, font_size=8, node_color="salmon")
    plt.title("Visualization of Disconnected 'Islands' (Potential Hallucinations)")
    plt.show()
else:
    print("No disconnected islands found.")

ERROR:no viable alternative at input '5' (5.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'Disconnected' (5.sysml line : 1 column : 13)
ERROR:no viable alternative at input 'Subgraphs' (5.sysml line : 1 column : 26)
ERROR:no viable alternative at input '(' (5.sysml line : 1 column : 36)
ERROR:no viable alternative at input 'Hallucinations' (5.sysml line : 1 column : 47)
ERROR:no viable alternative at input ')' (5.sysml line : 1 column : 61)
ERROR:no viable alternative at input 'connected' (5.sysml line : 3 column : 19)
ERROR:no viable alternative at input 'components' (5.sysml line : 3 column : 29)
ERROR:no viable alternative at input '(' (5.sysml line : 3 column : 40)
ERROR:no viable alternative at input 'graphs' (5.sysml line : 3 column : 54)
ERROR:no viable alternative at input ')' (5.sysml line : 3 column : 60)
ERROR:no viable alternative at input 'components' (5.sysml line : 3 column : 75)
ERROR:no viable alternative at input 'if' (5.sysml line : 4 column : 1)
ER



In [6]:
# 6. Interactive Visualization with PyVis

# Create a PyVis network
net = Network(notebook=True, height="750px", width="100%", bgcolor="#222222", font_color="white")

# For large graphs, we might want to sample or filter
# Let's visualize the main component + top 5 islands to keep it performant
nodes_to_viz = set(list(main_component)[:500]) # Limit main component nodes
for comp in hallucination_candidates[:10]:
    nodes_to_viz.update(comp)

subgraph = G.subgraph(nodes_to_viz)

net.from_nx(subgraph)

# Add some physics options
net.toggle_physics(True)
net.show_buttons(filter_=['physics'])

# Save and display
output_path = "knowledge_graph_interactive.html"
net.show(output_path)
print(f"Interactive graph saved to {output_path}. Open this file in a browser to explore.")


ERROR:no viable alternative at input '6' (6.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'Visualization' (6.sysml line : 1 column : 18)
ERROR:no viable alternative at input 'with' (6.sysml line : 1 column : 32)
ERROR:no viable alternative at input 'PyVis' (6.sysml line : 1 column : 37)
ERROR:no viable alternative at input '#' (6.sysml line : 3 column : 1)
ERROR:no viable alternative at input 'PyVis' (6.sysml line : 3 column : 12)
ERROR:no viable alternative at input 'network' (6.sysml line : 3 column : 18)
ERROR:no viable alternative at input 'net' (6.sysml line : 4 column : 1)
ERROR:missing '(' at 'For' (6.sysml line : 6 column : 3)
ERROR:no viable alternative at input 'graphs' (6.sysml line : 6 column : 13)
ERROR:no viable alternative at input ',' (6.sysml line : 6 column : 19)
ERROR:no viable alternative at input 'might' (6.sysml line : 6 column : 24)
ERROR:no viable alternative at input 'want' (6.sysml line : 6 column : 30)
ERROR:no viable alternative at input 't



In [7]:
# 7. Export to Mermaid (Swarmlord's View)
# Since the full graph is too large for Mermaid, we will export the "Core" (Top 30 Central Nodes)
# and their direct connections. This provides a high-level strategic view.

def export_to_mermaid(graph, output_file="core_memory.mmd", limit=30):
    # Calculate centrality to find the "Core"
    centrality = nx.degree_centrality(graph)
    top_nodes = sorted(centrality, key=centrality.get, reverse=True)[:limit]
    
    subgraph = graph.subgraph(top_nodes)
    
    mermaid_lines = ["graph TD"]
    
    # Add nodes and edges
    # Sanitize IDs for Mermaid (remove spaces, special chars)
    def sanitize(text):
        return str(text).replace(" ", "_").replace(".", "_").replace("/", "_").replace("-", "_")

    for u, v in subgraph.edges():
        u_safe = sanitize(u)
        v_safe = sanitize(v)
        # Try to get labels if they exist, else use ID
        u_label = graph.nodes[u].get('label', u)
        v_label = graph.nodes[v].get('label', v)
        
        # Escape quotes in labels
        u_label = str(u_label).replace('"', "'")
        v_label = str(v_label).replace('"', "'")
        
        line = f'    {u_safe}["{u_label}"] --> {v_safe}["{v_label}"]'
        mermaid_lines.append(line)
        
    content = "\n".join(mermaid_lines)
    
    with open(output_file, "w") as f:
        f.write(content)
    
    print(f"Mermaid diagram exported to {output_file}")
    print("Copy the content of this file into a Markdown block or Mermaid Live Editor.")
    return content

# Export the core graph
mermaid_code = export_to_mermaid(G, "core_memory.mmd")
print("\n--- Preview (First 10 lines) ---")
print("\n".join(mermaid_code.split("\n")[:10]))

ERROR:no viable alternative at input '7' (7.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'to' (7.sysml line : 1 column : 13)
ERROR:no viable alternative at input '(' (7.sysml line : 1 column : 24)
ERROR:no viable alternative at input ''s View)\n# Since the full graph is too large for Mermaid, we will export the "Core" (Top 30 Central Nodes)\n# and their direct connections. This provides a high-level strategic view.\n\ndef export_to_mermaid(graph, output_file="core_memory.mmd", limit=30):\n    # Calculate centrality to find the "Core"\n    centrality = nx.degree_centrality(graph)\n    top_nodes = sorted(centrality, key=centrality.get, reverse=True)[:limit]\n    \n    subgraph = graph.subgraph(top_nodes)\n    \n    mermaid_lines = ["graph TD"]\n    \n    # Add nodes and edges\n    # Sanitize IDs for Mermaid (remove spaces, special chars)\n    def sanitize(text):\n        return str(text).replace(" ", "_").replace(".", "_").replace("/", "_").replace("-", "_")\n\n    for



In [8]:
# 8. SOTA: Semantic Clustering (The "Meaning" Map)
# This uses Machine Learning (TF-IDF + PCA) to cluster nodes based on their text content.
# It reveals "Hidden Connections" that might not be explicitly linked in the graph.

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# 1. Prepare Data (Use Node Labels/Content)
node_ids = list(G.nodes())
# If we have 'content' attribute, use it. Otherwise use 'label' or ID.
corpus = [str(G.nodes[n].get('label', n)) for n in node_ids]

# 2. Vectorize (Convert Text to Numbers)
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(corpus)

# 3. Dimensionality Reduction (Compress to 2D for plotting)
pca = PCA(n_components=2)
coords = pca.fit_transform(X.toarray())

# 4. Clustering (Find Topics)
# We'll guess k=5 clusters for now
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(X)

# 5. Visualize
plt.figure(figsize=(12, 8))
scatter = plt.scatter(coords[:, 0], coords[:, 1], c=clusters, cmap='viridis', alpha=0.6)

# Label some points
for i, txt in enumerate(node_ids):
    if i % 10 == 0: # Label every 10th node to avoid clutter
        plt.annotate(txt, (coords[i, 0], coords[i, 1]), fontsize=8, alpha=0.7)

plt.title("Semantic Space: Concept Clusters (TF-IDF + PCA)")
plt.xlabel("Semantic Dimension 1")
plt.ylabel("Semantic Dimension 2")
plt.colorbar(scatter, label="Topic Cluster")
plt.grid(True, alpha=0.3)
plt.show()

ERROR:no viable alternative at input '8' (8.sysml line : 1 column : 1)
ERROR:no viable alternative at input 'Clustering' (8.sysml line : 1 column : 12)
ERROR:no viable alternative at input 'Clustering' (8.sysml line : 1 column : 21)
ERROR:no viable alternative at input '"Meaning"' (8.sysml line : 1 column : 37)
ERROR:no viable alternative at input ')' (8.sysml line : 1 column : 50)
ERROR:no viable alternative at input 'Machine' (8.sysml line : 2 column : 13)
ERROR:no viable alternative at input 'Learning' (8.sysml line : 2 column : 21)
ERROR:no viable alternative at input '(' (8.sysml line : 2 column : 30)
ERROR:no viable alternative at input '-' (8.sysml line : 2 column : 33)
ERROR:no viable alternative at input '+' (8.sysml line : 2 column : 38)
ERROR:no viable alternative at input ')' (8.sysml line : 2 column : 43)
ERROR:no viable alternative at input 'nodes' (8.sysml line : 2 column : 56)
ERROR:no viable alternative at input 'based' (8.sysml line : 2 column : 62)
ERROR:no viable al

