# RAG Demo: Retriever → Middleware → Prompt → LLM

In [1]:
%pip -q install -U numpy==1.26.4 chromadb==0.4.24 langchain==0.2.11 langchain-core==0.2.26   langchain-community==0.2.10 langchain-openai==0.1.17 pypdf tiktoken
# %pip -q install -U langchain-ollama
print("Deps installed.")

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m525.5/525.5 kB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.3/990.3 kB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m378.9/378.9 kB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive')

PROJ = Path('/content/drive/MyDrive/rag_bio_project').resolve()
print('Project path:', PROJ)

for d in [PROJ, PROJ/'src', PROJ/'data_pdfs', PROJ/'data_txt', PROJ/'index', PROJ/'notebooks']:
    d.mkdir(parents=True, exist_ok=True)
print('✅ Folders ready')

Mounted at /content/drive
Project path: /content/drive/MyDrive/rag_bio_project
✅ Folders ready


In [3]:
from pathlib import Path
INDEX_DIR = PROJ/"index"
SRC = PROJ/"src"
import sys
sys.path.append(str(SRC))
print("Project:", PROJ)
print("Index dir:", INDEX_DIR)

Project: /content/drive/MyDrive/rag_bio_project
Index dir: /content/drive/MyDrive/rag_bio_project/index


In [5]:
from chromadb import PersistentClient
client = PersistentClient(path=str(INDEX_DIR))
print("Collections:", [c.name for c in client.list_collections()])

AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead.

## 1) Retriever

In [None]:
from retriever import retrieve
question = "李青的年收入是多少？"
res = retrieve(persist_dir=str(INDEX_DIR), query_text=question, k=5, strategy="mmr", strictness="strict")
print("Route:", res.get("route"))
print("Top:")
for it in res.get("items", [])[:3]:
    print(it["grade"], f"{it['score']:.3f}", it["metadata"].get("source"))

## 2) Middleware role detection

In [None]:
from middleware import detect_characters_from_question
det = detect_characters_from_question("请比较LiQing与WangMu的收入", persist_dir=str(INDEX_DIR))
print(det)

## 3) Prompt preview

In [None]:
from prompting import build_prompt_messages_auto
msgs, info = build_prompt_messages_auto("What is LiQing's annual income?", res)
print("Mode:", info["mode"])
print(msgs[0].type, ">", msgs[0].content[:200])

## 4) Full pipeline (requires API key)

In [None]:
from pipeline import PipelineConfig, run_pipeline
cfg = PipelineConfig(persist_dir=str(INDEX_DIR), strictness="strict",
                     provider="openai", model="gpt-4o-mini", temperature=0.2)
out = run_pipeline("李青的年收入是多少？", cfg)
print("Answer:", out["answer"][:400])
print("\nReferences:\n", out["references"])
print("\nPrompt mode:", out["prompt_mode"], "Route:", out["route"])