# Week 6 — Multi-Hop Reasoning (Self-Ask / Decomposition)
**Goal:** Break complex questions into hops and answer each with RAG/Graph-RAG.


In [None]:

#@title Setup
import os, re, json, time
import networkx as nx
import pandas as pd

DATA_DIR = "./data_week6"
G = nx.Graph()
def ensure_demo_graph():
    if len(G.nodes())>0: return
    nodes = [("Method X","METHOD"),("Author A","AUTHOR"),("Dataset D1","DATASET"),("Paper P3","PAPER"),("Metric F1","METRIC")]
    for n,t in nodes: G.add_node(n, type=t)
    G.add_edge("Method X","Author A", doc_id="doc1", sentence="Method X was introduced by Author A.")
    G.add_edge("Method X","Dataset D1", doc_id="doc1", sentence="Method X compared on Dataset D1 with F1=0.78.")
    G.add_edge("Method X","Paper P3", doc_id="doc4", sentence="Paper P3 applies Method X to Dataset D2 and reports Accuracy 0.82.")
    G.add_edge("Dataset D1","Metric F1", doc_id="doc1", sentence="F1 reported for D1.")
ensure_demo_graph()
len(G.nodes()), len(G.edges())


## 1. Decompose Query

In [None]:

def decompose(query: str):
    q = query.lower()
    if "introduced" in q and "method x" in q and "dataset" in q:
        return ["Which paper introduced Method X?","Which dataset did that paper use for F1?"]
    return [query]

decompose("Which dataset did the paper that introduced Method X use for F1?")


## 2. Execute Hops with Graph Evidence

In [None]:

def neighbors_for(node):
    spans = []
    if node not in G: return spans
    for u,v,data in G.edges(node, data=True):
        spans.append({"doc_id": data.get("doc_id"), "sentence": data.get("sentence")})
    return spans

def answer_subq(subq, memory):
    if "introduced method x" in subq.lower():
        spans = neighbors_for("Method X")
        return {"subq": subq, "answer": "Paper P3 (demo)", "evidence": spans}
    if "dataset" in subq.lower() and "paper" in subq.lower():
        spans = neighbors_for("Dataset D1")
        return {"subq": subq, "answer": "Dataset D1 (demo)", "evidence": spans}
    return {"subq": subq, "answer": "Demo answer", "evidence": []}

def multi_hop(query):
    subs = decompose(query)
    memory = {}
    hops = []
    for s in subs:
        h = answer_subq(s, memory)
        hops.append(h)
        memory[len(hops)] = h["answer"]
    final = " ; ".join(h["answer"] for h in hops)
    cites = sorted({ev["doc_id"] for h in hops for ev in h["evidence"]})
    return {"query": query, "subqs": subs, "hops": hops, "final": final, "citations": cites}

out = multi_hop("Which dataset did the paper that introduced Method X use for F1?")
out


## 3. Print Trace

In [None]:

print("Final:", out["final"])
print("Citations:", out["citations"])
for i, h in enumerate(out["hops"], 1):
    print(f"Hop {i}: {h['subq']} -> {h['answer']}")
    for ev in h["evidence"][:2]:
        print("  -", ev["doc_id"], ":", ev["sentence"])
