# DataForSEO Topic Graph - DAG Analysis

This notebook analyzes the DataForSEO topic graph as a Directed Acyclic Graph (DAG) using NetworkX.

In [1]:
import json
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict

ModuleNotFoundError: No module named 'matplotlib'

## Load the Topic Graph

In [None]:
# Load the JSON file
with open('dataforseo-topic-graph.jsonl', 'r') as file:
    data = json.load(file)

print(f"Total nodes: {len(data['nodes'])}")
print(f"Root ID: {data.get('root_id', 'N/A')}")

## Build NetworkX DiGraph from Topic Data

In [None]:
# Create a directed graph
G = nx.DiGraph()

# Add nodes with their attributes
for node_id, node_data in data['nodes'].items():
    G.add_node(
        node_id,
        topic=node_data.get('topic', ''),
        depth=node_data.get('depth', 0),
        metadata=node_data.get('metadata', {})
    )
    
    # Add edges from parent to children
    for child_id in node_data.get('children', []):
        G.add_edge(node_id, str(child_id))

print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

## Verify it's a Valid DAG

In [None]:
# Check if it's a DAG
is_dag = nx.is_directed_acyclic_graph(G)
print(f"Is this a valid DAG? {is_dag}")

if not is_dag:
    # Find cycles if any
    try:
        cycles = list(nx.simple_cycles(G))
        print(f"Found {len(cycles)} cycles")
        if cycles:
            print("First cycle:", cycles[0])
    except:
        print("Could not detect cycles")

## Display All Topics

In [None]:
# Display all topics with their properties
print("=" * 100)
for node_id in G.nodes():
    topic = G.nodes[node_id]['topic']
    depth = G.nodes[node_id]['depth']
    children_count = G.out_degree(node_id)  # Number of outgoing edges
    parents_count = G.in_degree(node_id)    # Number of incoming edges
    
    print(f"ID: {node_id:4s} | Depth: {depth} | Parents: {parents_count} | Children: {children_count:2d}")
    print(f"  Topic: {topic}")
    print()

## DAG Analysis - Basic Statistics

In [None]:
# Find root nodes (no incoming edges)
root_nodes = [n for n in G.nodes() if G.in_degree(n) == 0]
print(f"Root nodes: {len(root_nodes)}")
for root in root_nodes:
    print(f"  - {root}: {G.nodes[root]['topic']}")

print()

# Find leaf nodes (no outgoing edges)
leaf_nodes = [n for n in G.nodes() if G.out_degree(n) == 0]
print(f"Leaf nodes: {len(leaf_nodes)}")
print(f"First 10 leaf topics:")
for leaf in leaf_nodes[:10]:
    print(f"  - {leaf}: {G.nodes[leaf]['topic']}")

## Topological Sort

In [None]:
# Get topological ordering (valid processing order)
if is_dag:
    topo_order = list(nx.topological_sort(G))
    print(f"Topological order (first 20 nodes):")
    for i, node_id in enumerate(topo_order[:20]):
        print(f"{i+1}. {node_id}: {G.nodes[node_id]['topic'][:80]}...")
else:
    print("Cannot perform topological sort on a graph with cycles")

## Depth Level Analysis

In [None]:
# Group nodes by depth
depth_groups = defaultdict(list)
for node_id in G.nodes():
    depth = G.nodes[node_id]['depth']
    depth_groups[depth].append(node_id)

print("Nodes per depth level:")
for depth in sorted(depth_groups.keys()):
    print(f"  Depth {depth}: {len(depth_groups[depth])} nodes")

print(f"\nMaximum depth: {max(depth_groups.keys())}")

## Find Ancestors and Descendants

In [None]:
# Example: Find all ancestors and descendants of a specific node
# Change this to any node ID you're interested in
example_node = '1'  # First child of root

if example_node in G.nodes():
    print(f"Analyzing node {example_node}: {G.nodes[example_node]['topic']}")
    print()
    
    # Ancestors (all nodes that can reach this node)
    ancestors = nx.ancestors(G, example_node)
    print(f"Ancestors ({len(ancestors)}):")
    for anc in list(ancestors)[:5]:
        print(f"  - {anc}: {G.nodes[anc]['topic'][:60]}...")
    
    print()
    
    # Descendants (all nodes reachable from this node)
    descendants = nx.descendants(G, example_node)
    print(f"Descendants ({len(descendants)}):")
    for desc in list(descendants)[:5]:
        print(f"  - {desc}: {G.nodes[desc]['topic'][:60]}...")

## Find Paths Between Nodes

In [None]:
# Find all paths from root to a leaf node
if root_nodes and leaf_nodes:
    root = root_nodes[0]
    leaf = leaf_nodes[0]
    
    if nx.has_path(G, root, leaf):
        # Get all simple paths
        all_paths = list(nx.all_simple_paths(G, root, leaf))
        print(f"Found {len(all_paths)} path(s) from root '{root}' to leaf '{leaf}'")
        print(f"\nFirst path:")
        for node in all_paths[0]:
            print(f"  â†’ {node}: {G.nodes[node]['topic'][:70]}...")
        
        # Shortest path
        shortest = nx.shortest_path(G, root, leaf)
        print(f"\nShortest path length: {len(shortest)-1} edges")

## Visualize a Subgraph (Limited Depth)

In [None]:
# Visualize only nodes up to depth 2 for readability
MAX_DEPTH = 2
subgraph_nodes = [n for n in G.nodes() if G.nodes[n]['depth'] <= MAX_DEPTH]
subG = G.subgraph(subgraph_nodes)

plt.figure(figsize=(20, 12))
pos = nx.spring_layout(subG, k=2, iterations=50)

# Color nodes by depth
node_colors = [G.nodes[n]['depth'] for n in subG.nodes()]

nx.draw(subG, pos, 
        node_color=node_colors, 
        cmap=plt.cm.viridis,
        with_labels=True,
        labels={n: n for n in subG.nodes()},
        node_size=1000,
        font_size=8,
        font_weight='bold',
        arrows=True,
        arrowsize=15)

plt.title(f"DataForSEO Topic DAG (Depth 0-{MAX_DEPTH})")
plt.colorbar(plt.cm.ScalarMappable(cmap=plt.cm.viridis), label='Depth')
plt.tight_layout()
plt.show()

print(f"Visualized {len(subgraph_nodes)} nodes out of {G.number_of_nodes()} total")

## Interactive Visualization with PyVis

In [None]:
from pyvis.network import Network

# Create interactive visualization for limited depth
MAX_DEPTH_INTERACTIVE = 3
subgraph_nodes = [n for n in G.nodes() if G.nodes[n]['depth'] <= MAX_DEPTH_INTERACTIVE]
subG = G.subgraph(subgraph_nodes)

net = Network(height="800px", width="100%", notebook=True, directed=True)
net.barnes_hut()  # Use Barnes-Hut physics for better layout

# Add nodes with hover tooltips
for node in subG.nodes():
    topic = G.nodes[node]['topic']
    depth = G.nodes[node]['depth']
    net.add_node(node, 
                 label=f"{node}",
                 title=f"Depth {depth}: {topic}",
                 level=depth)  # Use depth for hierarchical layout

# Add edges
for u, v in subG.edges():
    net.add_edge(u, v)

# Set hierarchical layout
net.set_options("""
{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "UD",
      "sortMethod": "directed"
    }
  },
  "physics": {
    "hierarchicalRepulsion": {
      "centralGravity": 0.0
    }
  }
}
""")

net.show("dataforseo_dag_interactive.html")
print(f"Interactive visualization saved to dataforseo_dag_interactive.html")