In [67]:
import requests, itertools, time, pickle, networkx as nx

SPARQL   = "https://query.wikidata.org/sparql"
HEADERS  = {"User-Agent": "Hackathon-VG-Graph/0.4"}   # ASCII only
ROOTS = [
    "Q395",
]
MAX_DEPTH = 4              # go this many subclass/instance hops
CHUNK     = 100            # Q‑ids per SPARQL VALUES clause
PAUSE     = 0.1            # polite delay (s) between calls

In [68]:
# ---------- utility ----------
def sparql(q: str):
    r = requests.post(SPARQL,
                      data={"query": q, "format": "json"},
                      headers=HEADERS, timeout=60)
    r.raise_for_status()
    return r.json()["results"]["bindings"]

def chunks(seq, n):
    it = iter(seq)
    while (batch := list(itertools.islice(it, n))):
        yield batch

In [69]:
# ---------- Stage 1: breadth‑first crawl -------------
all_nodes = set(ROOTS)
frontier  = set(ROOTS)
depth     = 0

while frontier:                         # <‑‑ keep going until nothing new
    depth += 1
    next_frontier = set()

    for batch in chunks(frontier, CHUNK):
        batch_vals = " ".join(f"wd:{q}" for q in batch)
        query = f"""
        SELECT DISTINCT ?child WHERE {{
          VALUES ?parent {{ {batch_vals} }}
          ?child (wdt:P279|wdt:P31|wdt:P361) ?parent .
        }}
        """
        for row in sparql(query):
            child = row["child"]["value"].rsplit("/", 1)[-1]
            if child not in all_nodes:
                all_nodes.add(child)
                next_frontier.add(child)
        time.sleep(PAUSE)

    print(f"Depth {depth}: +{len(next_frontier):,} new nodes")
    frontier = next_frontier           # loop stops when this is empty

print(f"Total nodes collected: {len(all_nodes):,}")

Depth 1: +121 new nodes
Depth 2: +2,449 new nodes
Depth 3: +6,651 new nodes
Depth 4: +15,902 new nodes
Depth 5: +31,063 new nodes


KeyboardInterrupt: 

In [None]:
# ---------- Stage 2: fetch edges among collected nodes ----------
edges = []
for batch in chunks(all_nodes, CHUNK):
    batch_vals = " ".join(f"wd:{q}" for q in batch)
    query = f"""
    SELECT ?parent ?child WHERE {{
      VALUES ?child {{ {batch_vals} }}
      ?child (wdt:P279|wdt:P361) ?parent .
      FILTER(?parent IN ({', '.join('wd:' + q for q in all_nodes)}))
    }}
    """
    for row in sparql(query):
        parent = row["parent"]["value"].rsplit("/", 1)[-1]
        child  = row["child"]["value"].rsplit("/", 1)[-1]
        edges.append((parent, child))
    time.sleep(PAUSE)

print(f"Edges collected: {len(edges):,}")

Edges collected: 131


In [None]:
# ---------- Stage 3: grab English labels (optional) ----------
labels = {}
for batch in chunks(all_nodes, 200):
    batch_vals = " ".join(f"wd:{q}" for q in batch)
    query = f"""
    SELECT ?id ?label WHERE {{
      VALUES ?id {{ {batch_vals} }}
      ?id rdfs:label ?label .
      FILTER (lang(?label) = "en")
    }}
    """
    for row in sparql(query):
        qid   = row["id"]["value"].rsplit("/", 1)[-1]
        label = row["label"]["value"]
        labels[qid] = label
    time.sleep(PAUSE)

In [None]:
# ---------- Build & save the NetworkX DAG ----------
G = nx.DiGraph()
for q in all_nodes:
    G.add_node(q, label=labels.get(q, q))
G.add_edges_from(edges)

print(f"Graph ready → {G.number_of_nodes():,} nodes | {G.number_of_edges():,} edges")

with open("vg_design_graph.pkl", "wb") as f:
    pickle.dump(G, f, protocol=pickle.HIGHEST_PROTOCOL)
print("Saved as vg_design_graph.pkl")

Graph ready → 1,081 nodes | 129 edges
Saved as vg_design_graph.pkl


In [None]:
import pickle, networkx as nx
from pyvis.network import Network

# ---- load the graph you built earlier ----
with open("vg_design_graph.pkl", "rb") as f:
    G: nx.DiGraph = pickle.load(f)

# ---- initialise an interactive net ----
net = Network(
    height="800px",           # bigger canvas
    width="100%",             # fill the cell / page
    directed=True,
    bgcolor="#ffffff",        # white background
    notebook=False            # change to True if inside Jupyter
)

# ---- transfer nodes & edges ----
for node_id, data in G.nodes(data=True):
    net.add_node(
        node_id,
        label=data.get("label", node_id),   # hover + visible label
        title=data.get("label", node_id)    # tooltip text
    )

for src, dst in G.edges():
    net.add_edge(src, dst)

net.toggle_physics(True)        # spring layout you can drag around
net.show("vg_design_graph.html", notebook=False)  # <-- opens a browser tab automatically


vg_design_graph.html
