In [None]:
### Execution Notes (Read First)
'''
The early cells that wrote config / launched Streamlit were experimental and failed on this machine.
✅ Use **Cell K** to write `app/app_rag.py`.  
✅ Launch Streamlit **from terminal** as documented.  
⛔ Do **not** run the old launcher/config cells below; they are kept for traceability.
'''

In [4]:
"""
'Cell A: Environment Probe'
Purpose:
- Detect Python version, CUDA toolkits (if available), GPU info, and key package versions.
- Save a merged record into configs/env_rag_graph.json for reproducibility.

Notes:
- Uses only standard libs + minimal imports to avoid heavy loads here.
"""

import json, os, sys, subprocess, shutil, platform
from pathlib import Path

# 'Paths and files' - adjust only if your project layout changes
ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")
CFG  = ROOT / "configs" / "env_rag_graph.json"

def cmd_out(args):
    # 'Run a shell command safely and return stdout text'
    try:
        return subprocess.check_output(args, stderr=subprocess.STDOUT, text=True).strip()
    except Exception as e:
        return f"ERROR: {e}"

# 'Collect environment info'
info = {
    "python_venv": str(Path.home() / "venvs" / "core-rag"),
    "python_version": sys.version.split()[0],
    "platform": platform.platform(),
    "cuda_visible_devices": os.environ.get("CUDA_VISIBLE_DEVICES", None),
    "nvidia_smi": cmd_out(["bash", "-lc", "nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader"]),
    "which_python": cmd_out(["bash", "-lc", "which python"]),
    "pip_freeze_head": cmd_out(["bash", "-lc", "pip freeze | head -n 20"])
}

# 'Key packages versions' - quick imports to record versions
versions = {}
for pkg in ["numpy", "pandas", "networkx", "sentence_transformers", "transformers", "accelerate", "faiss"]:
    try:
        mod = __import__(pkg)
        versions[pkg] = getattr(mod, "__version__", "unknown")
    except Exception as e:
        versions[pkg] = f"not importable: {e}"

info["packages"] = versions

# 'Merge with existing json'
CFG.parent.mkdir(parents=True, exist_ok=True)
existing = {}
if CFG.exists():
    try:
        existing = json.loads(CFG.read_text())
    except Exception:
        existing = {}

existing.update(info)
CFG.write_text(json.dumps(existing, indent=2))

print("Environment probe written to:", CFG)
print(json.dumps(info, indent=2))


  from tqdm.autonotebook import tqdm, trange


Environment probe written to: /home/manny-buff/projects/capstone/week6-rag-graph/configs/env_rag_graph.json
{
  "python_venv": "/home/manny-buff/venvs/core-rag",
  "python_version": "3.11.9",
  "platform": "Linux-6.14.0-33-generic-x86_64-with-glibc2.39",
  "cuda_visible_devices": null,
  "nvidia_smi": "NVIDIA GeForce RTX 4080, 580.65.06, 16376 MiB",
  "which_python": "/home/manny-buff/venvs/core-rag/bin/python",
  "pip_freeze_head": "accelerate==1.10.1\nacres==0.5.0\naiofiles==24.1.0\naiohappyeyeballs==2.6.1\naiohttp==3.12.15\naiosignal==1.4.0\naiosqlite==0.21.0\naltair==5.5.0\nannotated-types==0.7.0\nanyio==4.10.0\nargon2-cffi==25.1.0\nargon2-cffi-bindings==25.1.0\narrow==1.3.0\nasttokens==3.0.0\nasync-lru==2.0.5\nattrs==25.3.0\nav==15.1.0\nbabel==2.17.0\nbackoff==2.2.1\nbanks==2.2.0",
  "packages": {
    "numpy": "2.2.1",
    "pandas": "2.2.3",
    "networkx": "3.3",
    "sentence_transformers": "3.0.1",
    "transformers": "4.56.2",
    "accelerate": "1.10.1",
    "faiss": "1.10.0"


In [2]:
"""
'Cell B: Sanity Probe'
Purpose:
- Verify core imports.
- Run a tiny e5 embedding call to confirm encoder works.
- Check that local Qwen path exists (skip heavy model load for now).
"""

from pathlib import Path

# 'Load run config'
import json
CFG_RUN = Path("/home/manny-buff/projects/capstone/week6-rag-graph/configs/rag_graph_run_config.json")
run_cfg = json.loads(CFG_RUN.read_text())

# 'Imports check'
import numpy as np
import pandas as pd
import networkx as nx
from sentence_transformers import SentenceTransformer

# 'Embed a sample query with e5-small-v2'
embed_model_id = run_cfg["embed_model"]
model = SentenceTransformer(embed_model_id)
vec = model.encode(["hello graph-rag world"], convert_to_numpy=True)
print("Embedding shape:", vec.shape, "dtype:", vec.dtype)

# 'Confirm local Qwen path exists'
qwen_local = Path(run_cfg["llm_local_path"])
print("Qwen local path:", qwen_local, "exists:", qwen_local.exists())

# 'Lightweight graph sanity'
G = nx.Graph()
G.add_edge("doc_A", "doc_B", weight=0.9)
G.add_edge("doc_B", "doc_C", weight=0.7)
print("Graph nodes/edges:", G.number_of_nodes(), G.number_of_edges())


Embedding shape: (1, 384) dtype: float32
Qwen local path: /home/manny-buff/projects/capstone/hw-rag/models/Qwen2-VL-2B-Instruct exists: True
Graph nodes/edges: 3 2


In [3]:
"""
'Cell C: Config + Helpers (extended)'
- Loads config.
- Discovers multiple filetypes.
- Extracts text from txt/md/text/pdf/json/csv/html/htm.
- Adds a tqdm fallback and silences the common tqdm warning.
"""

import os, json, re, math, pickle, warnings
from pathlib import Path
from typing import List, Dict, Any
import pandas as pd

# Silence noisy tqdm warnings if present
warnings.filterwarnings("ignore", message=".*tqdm.*")

# --- Load run config ---
RUN_CFG_PATH = Path("/home/manny-buff/projects/capstone/week6-rag-graph/configs/rag_graph_run_config.json")
cfg = json.loads(RUN_CFG_PATH.read_text())

CORPUS_ROOT   = Path(cfg["corpus_root"])
VDB_DIR       = Path(cfg["vector_db_dir"])
EMBED_ID      = cfg["embed_model"]
LLM_MODEL_ID  = cfg["llm_model_id"]
LLM_LOCAL     = Path(cfg["llm_local_path"])
DEVICE        = cfg.get("device", "cuda")
RETRIEVER_K   = int(cfg.get("retriever_k", 5))
HOP_LIMIT     = int(cfg.get("hop_limit", 2))

VDB_DIR.mkdir(parents=True, exist_ok=True)

# --- Optional deps used if available ---
try:
    from tqdm import tqdm
except Exception:
    def tqdm(x, **kw):  # no-op fallback
        return x

# PDF
try:
    from pypdf import PdfReader
except Exception:
    PdfReader = None

# HTML
try:
    from bs4 import BeautifulSoup
except Exception:
    BeautifulSoup = None

# --- File discovery ---
EXTS = {".txt", ".md", ".text", ".pdf", ".json", ".csv", ".html", ".htm"}

def find_files(root: Path) -> List[Path]:
    files = []
    for p in root.rglob("*"):
        if p.is_file() and p.suffix.lower() in EXTS:
            files.append(p)
    files.sort()
    return files

# --- Loaders by type ---
def load_text_plain(fp: Path) -> str:
    try:
        return fp.read_text(encoding="utf-8", errors="ignore")
    except Exception:
        return fp.read_text(errors="ignore")

def load_text_pdf(fp: Path) -> str:
    if PdfReader is None:
        return ""
    try:
        out = []
        reader = PdfReader(str(fp))
        for page in reader.pages:
            out.append(page.extract_text() or "")
        return "\n".join(out)
    except Exception:
        return ""

def load_text_json(fp: Path) -> str:
    try:
        obj = json.loads(fp.read_text(encoding="utf-8", errors="ignore"))
        # Flatten string-like leaf values
        def walk(x):
            if isinstance(x, dict):
                return " ".join(walk(v) for v in x.values())
            if isinstance(x, list):
                return " ".join(walk(v) for v in x)
            if isinstance(x, (str, int, float, bool)):
                return str(x)
            return ""
        return walk(obj)
    except Exception:
        return ""

def load_text_csv(fp: Path) -> str:
    try:
        df = pd.read_csv(fp, nrows=10000)  # cap large files
        return " ".join(map(str, df.astype(str).values.ravel().tolist()))
    except Exception:
        try:
            df = pd.read_table(fp, nrows=10000)
            return " ".join(map(str, df.astype(str).values.ravel().tolist()))
        except Exception:
            return ""

def load_text_html(fp: Path) -> str:
    if BeautifulSoup is None:
        return ""
    try:
        html = fp.read_text(encoding="utf-8", errors="ignore")
        soup = BeautifulSoup(html, "lxml")
        return soup.get_text(" ", strip=True)
    except Exception:
        return ""

LOADERS = {
    ".txt":  load_text_plain,
    ".md":   load_text_plain,
    ".text": load_text_plain,
    ".pdf":  load_text_pdf,
    ".json": load_text_json,
    ".csv":  load_text_csv,
    ".html": load_text_html,
    ".htm":  load_text_html,
}

def normalize_ws(s: str) -> str:
    return re.sub(r"\s+", " ", s).strip()

def chunk_text(s: str, max_tokens: int = 180, overlap: int = 30) -> List[str]:
    toks = s.split()
    chunks = []
    i = 0
    while i < len(toks):
        j = min(i + max_tokens, len(toks))
        chunk = " ".join(toks[i:j]).strip()
        if chunk:
            chunks.append(chunk)
        if j == len(toks):
            break
        i = max(0, j - overlap)
    return chunks

# --- Artifact I/O ---
ART_META   = VDB_DIR / "chunks_meta.parquet"
ART_CHUNKS = VDB_DIR / "chunks_text.pkl"
ART_FAISS  = VDB_DIR / "faiss.index"
ART_GRAPH  = VDB_DIR / "graph.pkl"

import pickle
def save_chunks_text(chunks: List[str]):
    with open(ART_CHUNKS, "wb") as f:
        pickle.dump(chunks, f)

def load_chunks_text() -> List[str]:
    with open(ART_CHUNKS, "rb") as f:
        return pickle.load(f)

print("Config loaded.")
print("CORPUS_ROOT =", CORPUS_ROOT)
print("Vector DB path =", VDB_DIR)
print("Extensions searched:", sorted(EXTS))


Config loaded.
CORPUS_ROOT = /home/manny-buff/projects/capstone/hw-rag/data
Vector DB path = /home/manny-buff/projects/capstone/week6-rag-graph/artifacts/vdb
Extensions searched: ['.csv', '.htm', '.html', '.json', '.md', '.pdf', '.text', '.txt']


In [4]:
"""
'Cell D (guarded): Build Embeddings + FAISS index'
- Reads/discovers/loads multiple file types.
- Chunks and embeds with e5-small-v2.
- Builds FAISS IP index (cosine on normalized vectors).
- Guards against empty corpus (prints message and returns early).
"""

from sentence_transformers import SentenceTransformer
import numpy as np, time, faiss

files = find_files(CORPUS_ROOT)
records = []
chunks_text = []

for doc_id, fp in enumerate(tqdm(files, desc="Loading+Chunking")):
    loader = LOADERS.get(fp.suffix.lower(), load_text_plain)
    raw = loader(fp)
    text = normalize_ws(raw)
    if not text:
        continue
    parts = chunk_text(text, max_tokens=180, overlap=30)
    for k, ch in enumerate(parts):
        records.append({
            "doc_id": doc_id,
            "chunk_id": len(chunks_text),
            "path": str(fp),
            "chunk_idx": k
        })
        chunks_text.append(ch)

import pandas as pd
meta_df = pd.DataFrame(records)
print(f"Docs: {len(files)} | Chunks: {len(chunks_text)}")

# Guard: no chunks → stop gracefully
if len(chunks_text) == 0:
    print("No chunks found. Please confirm corpus file types and that loaders extracted text.")
    # Tip for debugging: run the shell probe to see extensions/counts.
    raise SystemExit

# Embed
model = SentenceTransformer(EMBED_ID)
t0 = time.time()
emb = model.encode(
    chunks_text,
    convert_to_numpy=True,
    normalize_embeddings=True,
    show_progress_bar=True
)
print("Embeddings:", emb.shape, "elapsed_sec:", round(time.time()-t0, 2))

# Build FAISS
dim = emb.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(emb.astype(np.float32))

# Save artifacts
meta_df.to_parquet(ART_META, index=False)
save_chunks_text(chunks_text)
faiss.write_index(index, str(ART_FAISS))

print("Saved:")
print(" -", ART_META)
print(" -", ART_CHUNKS)
print(" -", ART_FAISS)


Loading+Chunking:  71%|███████████████████████████████▊             | 12/17 [00:59<00:12,  2.51s/it]EOF marker not found
Loading+Chunking: 100%|█████████████████████████████████████████████| 17/17 [01:01<00:00,  3.60s/it]


Docs: 17 | Chunks: 4381


Batches:   0%|          | 0/137 [00:00<?, ?it/s]

Embeddings: (4381, 384) elapsed_sec: 4.43
Saved:
 - /home/manny-buff/projects/capstone/week6-rag-graph/artifacts/vdb/chunks_meta.parquet
 - /home/manny-buff/projects/capstone/week6-rag-graph/artifacts/vdb/chunks_text.pkl
 - /home/manny-buff/projects/capstone/week6-rag-graph/artifacts/vdb/faiss.index


In [5]:
"""
'Cell E: Build Similarity Graph'
Purpose:
- Create a lightweight graph of chunk relationships using top-N cosine neighbors.
- Collapses edges to doc-level (optional) or keeps chunk-level. We'll keep chunk-level for precision.
- Save graph.pkl for later Multi-Hop traversal.
"""

import numpy as np, networkx as nx, faiss, math

# Params for graph density
TOP_NEIGHBORS = max(10, RETRIEVER_K * 3)  # small multiple of retriever_k

# Load index and chunks
index = faiss.read_index(str(ART_FAISS))
chunks = load_chunks_text()
meta  = pd.read_parquet(ART_META)

# Query each vector against index to get neighbors (excluding self)
D, I = index.search(emb.astype(np.float32), TOP_NEIGHBORS + 1)

G = nx.Graph()
for row_idx, nbrs in enumerate(I):
    src = int(row_idx)
    for rank, nb in enumerate(nbrs):
        if nb == -1 or nb == src: 
            continue
        w = float(D[row_idx, rank])
        if w <= 0: 
            continue
        # Add undirected edge with weight=max(existing,w)
        if G.has_edge(src, nb):
            if w > G[src][nb].get("weight", 0.0):
                G[src][nb]["weight"] = w
        else:
            G.add_edge(src, nb, weight=w)

# Persist graph
with open(ART_GRAPH, "wb") as f:
    pickle.dump(G, f)

print("Graph built.")
print("Nodes:", G.number_of_nodes(), "Edges:", G.number_of_edges())


Graph built.
Nodes: 4381 Edges: 46574


In [6]:
"""
'Cell F: Validate'
Purpose:
- Issue a sample query to FAISS, print top-k chunk previews
- Show 1-step neighbors in the graph for the top hit (sanity for Multi-Hop)
"""

import textwrap, faiss, numpy as np
from sentence_transformers import SentenceTransformer

index = faiss.read_index(str(ART_FAISS))
meta  = pd.read_parquet(ART_META)
chunks = load_chunks_text()
enc   = SentenceTransformer(EMBED_ID)

query = "Briefly summarize the core topic of this corpus."
qv = enc.encode([query], convert_to_numpy=True, normalize_embeddings=True)
D, I = index.search(qv.astype(np.float32), RETRIEVER_K)

print("Top-k retrieved:")
for rank, cid in enumerate(I[0]):
    doc = meta.loc[meta["chunk_id"]==cid].iloc[0]
    preview = textwrap.shorten(chunks[cid], width=140, placeholder=" …")
    print(f"[{rank+1}] score={D[0,rank]:.3f} | {doc['path']} | chunk#{doc['chunk_idx']} :: {preview}")

# Graph neighbor preview for top hit
top_chunk = int(I[0,0])
print("\nGraph neighbors (first 10) of top chunk:", top_chunk)
with open(ART_GRAPH, "rb") as f:
    G = pickle.load(f)
nbrs = list(G.neighbors(top_chunk))[:10]
print("Neighbors:", nbrs)


Top-k retrieved:
[1] score=0.822 | /home/manny-buff/projects/capstone/hw-rag/data/1001 do-it-yourself hints & tips  tricks.pdf | chunk#1033 :: For inspecting a car's fi:ont and rear lights, p. 29, "Night moves" > Minor tUe for mounting on the ceil- ing of a closet to see what is …
[2] score=0.815 | /home/manny-buff/projects/capstone/hw-rag/data/1001 do-it-yourself hints & tips  tricks.pdf | chunk#1008 :: for a child's dresser, p. 21, "Playful pulls" Aluminum foil >■ Wrapped around pillows, to keep cats off the sofa, p. 41, "Stay off the …
[3] score=0.815 | /home/manny-buff/projects/capstone/hw-rag/data/1001 do-it-yourself hints & tips  tricks.pdf | chunk#1030 :: pour before completion, p. 258, "Easy pour" >■ To prevent a toilet bowl from "sweating" m humid weather, p. 173, "Bathroom condensation" …
[4] score=0.815 | /home/manny-buff/projects/capstone/hw-rag/data/the-complete-idiots-guide-to-simple-home-repair.pdf | chunk#26 :: motion. Warmest thanks are also due to Lynn Northrup, Jan L

In [7]:
"""
'Cell G: Load config + artifacts + embedder'
Purpose:
- Read run config (same JSON as Week6-1).
- Load FAISS, graph, metadata, and chunks.
- Initialize e5-small-v2 embedder.
"""

import json, pickle
from pathlib import Path
import pandas as pd
import numpy as np
import faiss, networkx as nx
from sentence_transformers import SentenceTransformer

# 'Paths'
ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")
CFG_RUN = ROOT / "configs" / "rag_graph_run_config.json"
VDB     = ROOT / "artifacts" / "vdb"

# 'Artifacts'
ART_META   = VDB / "chunks_meta.parquet"
ART_CHUNKS = VDB / "chunks_text.pkl"
ART_FAISS  = VDB / "faiss.index"
ART_GRAPH  = VDB / "graph.pkl"

# 'Load config'
cfg = json.loads(CFG_RUN.read_text())
CORPUS_ROOT  = Path(cfg["corpus_root"])
EMBED_ID     = cfg["embed_model"]              # e.g., 'intfloat/e5-small-v2'
LLM_MODEL_ID = cfg["llm_model_id"]             # e.g., 'Qwen/Qwen2.5-VL-3B-Instruct'
LLM_LOCAL    = Path(cfg["llm_local_path"])     # local Qwen path
RETRIEVER_K  = int(cfg.get("retriever_k", 5))
HOP_LIMIT    = int(cfg.get("hop_limit", 2))

# 'Load artifacts'
meta   = pd.read_parquet(ART_META)
chunks = pickle.loads(ART_CHUNKS.read_bytes())
index  = faiss.read_index(str(ART_FAISS))
with open(ART_GRAPH, "rb") as f:
    G = pickle.load(f)

# 'Embedder'
embedder = SentenceTransformer(EMBED_ID)

print("Loaded meta:", meta.shape, "| chunks:", len(chunks))
print("FAISS dims:", index.d, "| Graph nodes/edges:", G.number_of_nodes(), G.number_of_edges())
print("LLM local path exists:", LLM_LOCAL.exists())


Loaded meta: (4381, 4) | chunks: 4381
FAISS dims: 384 | Graph nodes/edges: 4381 46574
LLM local path exists: True


In [5]:
"""
'Cell H: Retrieval + Graph Expansion + Context Builder'
Purpose:
- Provide dense retriever (FAISS).
- Provide neighbor expansion up to HOP_LIMIT with breadth cap.
- Consolidate chunks into a prompt context (size-bounded).
"""

import math, textwrap
from typing import List, Set, Dict

def dense_retrieve(query: str, top_k: int) -> List[int]:
    # 'Encode query and search FAISS; return chunk IDs'
    q = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)
    D, I = index.search(q.astype(np.float32), top_k)
    return [int(x) for x in I[0]]

def expand_via_graph(seed_ids: List[int], hop_limit: int, per_seed_cap: int = 20, global_cap: int = 200) -> List[int]:
    # 'Expand neighbors up to hop_limit; cap breadth; dedupe; return chunk IDs'
    visited: Set[int] = set(int(s) for s in seed_ids)
    frontier: Set[int] = set(visited)
    for hop in range(hop_limit):
        next_frontier: Set[int] = set()
        for node in list(frontier):
            nbrs = list(G.neighbors(node))
            # limit per-seed to avoid explosion
            for nb in nbrs[:per_seed_cap]:
                nb = int(nb)  # guard np.int64
                if nb not in visited:
                    next_frontier.add(nb)
        frontier = next_frontier
        visited.update(frontier)
        if len(visited) >= global_cap:
            break
    return list(visited)

def build_context(chunk_ids: List[int], max_chars: int = 4000) -> str:
    # 'Join snippets with lightweight headers; stop at char budget'
    out = []
    size = 0
    for cid in chunk_ids:
        row = meta.loc[meta["chunk_id"] == cid]
        if row.empty:
            continue
        path = row.iloc[0]["path"]
        idx  = row.iloc[0]["chunk_idx"]
        snippet = textwrap.shorten(chunks[cid], width=360, placeholder=" …")
        block = f"[SOURCE] {path} | chunk#{idx}\n{snippet}\n"
        if size + len(block) > max_chars:
            break
        out.append(block)
        size += len(block)
    return "\n".join(out)

def retrieve_expand_context(query: str, top_k: int = None, hop_limit: int = None, per_seed_cap: int = 20, global_cap: int = 200, max_chars: int = 4000):
    # 'End-to-end helper: dense retrieval → graph expand → context'
    if top_k is None:    top_k    = RETRIEVER_K
    if hop_limit is None: hop_limit = HOP_LIMIT

    seeds = dense_retrieve(query, top_k)
    expanded = expand_via_graph(seeds, hop_limit, per_seed_cap=per_seed_cap, global_cap=global_cap)
    # order: seeds first, then expanded (stable, unique)
    ordered = []
    seen = set()
    for x in seeds + expanded:
        if x not in seen:
            ordered.append(x)
            seen.add(x)
    context = build_context(ordered, max_chars=max_chars)
    return seeds, ordered, context


In [15]:
"""
'Cell I: Qwen Answer Synthesis (VL-aware, warning-free)'
- Uses AutoProcessor(use_fast=False) to silence the fast/slow warning.
- Uses AutoModelForImageTextToText (replaces deprecated AutoModelForVision2Seq).
- Uses chat template when available for better instruction adherence.
- Deterministic generation: no temperature/top_p/top_k; do_sample=False.
"""

import torch
from pathlib import Path
from transformers import (
    AutoConfig, AutoProcessor, AutoModelForImageTextToText,
    AutoTokenizer, AutoModelForCausalLM
)

class QwenAnswerer:
    def __init__(self, local_dir: Path, model_id: str, device: str = "cuda"):
        self.local_dir = local_dir
        self.model_id  = model_id
        self.device    = device
        self.is_vl     = False
        self.processor = None
        self.tokenizer = None
        self.model     = None

    def _load_from(self, src: str):
        cfg = AutoConfig.from_pretrained(src, trust_remote_code=True)
        mtype = getattr(cfg, "model_type", "").lower()

        if "vl" in mtype:  # qwen2_vl / qwen2_5_vl
            self.is_vl = True
            # use_fast=False: silences the processor warning and preserves old behavior
            self.processor = AutoProcessor.from_pretrained(src, trust_remote_code=True, use_fast=False)
            self.model = AutoModelForImageTextToText.from_pretrained(
                src, trust_remote_code=True,
                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                device_map="auto"
            )
        else:
            self.is_vl = False
            self.tokenizer = AutoTokenizer.from_pretrained(src, trust_remote_code=True)
            self.model = AutoModelForCausalLM.from_pretrained(
                src, trust_remote_code=True,
                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                device_map="auto"
            )

    def load(self):
        last_err = None
        sources = []
        if self.local_dir.exists():
            sources.append(str(self.local_dir))
        sources.append(self.model_id)
        for src in sources:
            try:
                self._load_from(src)
                return
            except Exception as e:
                last_err = e
                self.processor = self.tokenizer = self.model = None
        raise RuntimeError(f"Failed to load Qwen (VL-aware). Last error: {last_err}")

    def _format_prompt(self, question: str, context: str) -> dict:
        """
        Returns a dict with tokenization-ready inputs.
        Uses chat template if available for better instruction following.
        """
        system_msg = "You are a concise home-repair RAG assistant. Use ONLY the provided context. If missing info, say so."
        user_msg   = f"Question:\n{question}\n\nContext:\n{context}\n\nRespond concisely and cite key source file names."

        if self.is_vl:
            tok = self.processor.tokenizer
        else:
            tok = self.tokenizer

        apply_chat = getattr(tok, "apply_chat_template", None)
        if callable(apply_chat):
            messages = [
                {"role": "system", "content": system_msg},
                {"role": "user",   "content": user_msg}
            ]
            prompt_text = apply_chat(messages, tokenize=False, add_generation_prompt=True)
        else:
            prompt_text = f"{system_msg}\n\n{user_msg}"

        return {"text": prompt_text}

    def answer(self, question: str, context: str, max_new_tokens: int = 240) -> str:
        assert self.model is not None, "Model not loaded"
        prompt_dict = self._format_prompt(question, context)

        if self.is_vl:
            # text-only path via processor; deterministic generation (no sampling flags)
            inputs = self.processor(**prompt_dict, return_tensors="pt")
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
            with torch.no_grad():
                out = self.model.generate(
                    **inputs,
                    max_new_tokens=max_new_tokens,
                    do_sample=False
                )
            text = self.processor.tokenizer.decode(out[0], skip_special_tokens=True)
        else:
            inputs = self.tokenizer(prompt_dict["text"], return_tensors="pt").to(self.model.device)
            with torch.no_grad():
                out = self.model.generate(
                    **inputs,
                    max_new_tokens=max_new_tokens,
                    do_sample=False
                )
            text = self.tokenizer.decode(out[0], skip_special_tokens=True)

        return text.strip()

qwen = QwenAnswerer(LLM_LOCAL, LLM_MODEL_ID)
print("QwenAnswerer (VL-aware, warning-free) ready. Call qwen.load() before answering.")




In [16]:
"""
'Cell J: Run query + log ablation row'
Purpose:
- Execute retrieve → expand → context build.
- Load Qwen and generate answer.
- Append an ablation row to CSV.
"""

import time, csv, os
ABL_PATH = ROOT / "artifacts" / "ablation_results_graph.csv"

query = "How can I stop a toilet tank from sweating in humid weather, and what simple materials do I need?"
seeds, expanded, context = retrieve_expand_context(query, top_k=RETRIEVER_K, hop_limit=HOP_LIMIT, per_seed_cap=20, global_cap=200, max_chars=4000)

print("Seeds:", seeds[:10], "… total:", len(seeds))
print("Expanded set size:", len(expanded))
print("Context preview:\n", context[:600], "…", sep="")

"""
Small patch: keep Cell J logic as-is, but no sampling flags are passed by QwenAnswerer now.
Just re-run this cell after re-running Cell I and calling qwen.load().
"""

# (re-run as before)
qwen.load()
t0 = time.time()
answer = qwen.answer(query, context, max_new_tokens=256)
elapsed = round(time.time() - t0, 2)

print("\n=== Answer ===\n", answer, "\n")
print("Latency (s):", elapsed)
# ablation append stays unchanged

# 'Log ablation row (accuracy left blank for manual scoring later)'
row = {
    "variant": "dense+graph",
    "retriever_k": RETRIEVER_K,
    "hop_limit": HOP_LIMIT,
    "accuracy": "",
    "notes": f"seeds={len(seeds)} expanded={len(expanded)} latency_s={elapsed}"
}

# Append row
header = ["variant", "retriever_k", "hop_limit", "accuracy", "notes"]
file_exists = ABL_PATH.exists()
with open(ABL_PATH, "a", newline="") as f:
    w = csv.DictWriter(f, fieldnames=header)
    if not file_exists:
        w.writeheader()
    w.writerow(row)

print(f"Ablation row appended to {ABL_PATH}")


Seeds: [1203, 528, 287, 507, 1199] … total: 5
Expanded set size: 813
Context preview:
[SOURCE] /home/manny-buff/projects/capstone/hw-rag/data/A Dirty Guide to a Clean Home _ Housekeeping Hacks You Cant Live Without.pdf | chunk#107
dusty and are a pain to clean. There is no need for a fancy tool to clean them, your hand is the perfect solution. The easiest way is just to grab a sock, slightly dampen it, put it over your hand, grasp each slat, and glide it along from one side to the other. If your blinds need a super deep clean, you can give them a bath. Yes, that’s right. If the …

[SOURCE] /home/manny-buff/projects/capstone/hw-rag/data/1001 do-it-yourself hints & tips  tricks.p…


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


=== Answer ===
 system
You are a concise home-repair RAG assistant. Use ONLY the provided context. If missing info, say so.
user
Question:
How can I stop a toilet tank from sweating in humid weather, and what simple materials do I need?

Context:
[SOURCE] /home/manny-buff/projects/capstone/hw-rag/data/A Dirty Guide to a Clean Home _ Housekeeping Hacks You Cant Live Without.pdf | chunk#107
dusty and are a pain to clean. There is no need for a fancy tool to clean them, your hand is the perfect solution. The easiest way is just to grab a sock, slightly dampen it, put it over your hand, grasp each slat, and glide it along from one side to the other. If your blinds need a super deep clean, you can give them a bath. Yes, that’s right. If the …

[SOURCE] /home/manny-buff/projects/capstone/hw-rag/data/1001 do-it-yourself hints & tips  tricks.pdf | chunk#528
Royal flush. Try replacing an old toilet writh a new low- flow model. A family of four can save over 20,000 gallons of water each year. A

In [None]:
# Week 6 — Part 2: Multi-Hop QA (Summary)
'''
**Retriever**: FAISS (cosine on normalized e5-small-v2)  
**Graph Expansion**: neighbor hops = `HOP_LIMIT` (default 2), per-seed cap = 20, global cap = 200  
**LLM**: Qwen-VL (VL-aware loader, deterministic `do_sample=False`)  
**Prompting**: Chat template when available; system+user roles; context-only constraint

### Pipeline
1. **Dense Retrieval** → top-K chunk IDs.
2. **Graph Expansion** → add neighbors (bounded breadth), dedupe, seeds prioritized.
3. **Context Build** → path+chunk headers with short previews, size-bounded.
4. **Answer Synthesis** → Qwen-VL text-only path; concise answer with file citations.

### Validations
- Seeds/Expanded sizes printed.
- Answer generated without processor/deprecation/generation warnings.
- Ablation row appended to `artifacts/ablation_results_graph.csv`.

**Determinism**: No sampling; repeated runs on the same artifacts/config should match.

'''

In [19]:
"""
'Report Retrieval Script #1 (Week6-1 & Week6-2)'
Purpose:
- Create or update a single markdown file summarizing environment, config,
  index/graph stats, and ablation results for Parts 1 & 2.
- Output: artifacts/Report_snippets_Wk6_1_2.md
"""

from pathlib import Path
import json, pickle, pandas as pd

ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")
OUT  = ROOT / "artifacts" / "Report_snippets_Wk6_1_2.md"

CFG_ENV = ROOT / "configs" / "env_rag_graph.json"
CFG_RUN = ROOT / "configs" / "rag_graph_run_config.json"
VDB     = ROOT / "artifacts" / "vdb"

ART_META   = VDB / "chunks_meta.parquet"
ART_FAISS  = VDB / "faiss.index"
ART_GRAPH  = VDB / "graph.pkl"
ABL_CSV    = ROOT / "artifacts" / "ablation_results_graph.csv"

# Load pieces (best-effort)
env_info = json.loads(CFG_ENV.read_text()) if CFG_ENV.exists() else {}
run_cfg  = json.loads(CFG_RUN.read_text()) if CFG_RUN.exists() else {}
meta_df  = pd.read_parquet(ART_META) if ART_META.exists() else pd.DataFrame()
graph_n  = graph_e = None
if ART_GRAPH.exists():
    with open(ART_GRAPH, "rb") as f:
        G = pickle.load(f)
        graph_n, graph_e = G.number_of_nodes(), G.number_of_edges()
abl_df = pd.read_csv(ABL_CSV) if ABL_CSV.exists() else pd.DataFrame()

# Compose markdown
lines = []
lines.append("# Week 6 — Report Snippets (Parts 1 & 2)\n")
lines.append("## Environment")
lines.append(f"- Python venv: `{env_info.get('python_venv','')}`")
lines.append(f"- Python: `{env_info.get('python_version','')}`")
lines.append(f"- GPU: `{env_info.get('nvidia_smi','')}`")
lines.append("")

lines.append("## Run Config")
for k in ["corpus_root","embed_model","llm_model_id","llm_local_path","device","retriever_k","hop_limit"]:
    lines.append(f"- {k}: `{run_cfg.get(k,'')}`")
lines.append("")

lines.append("## Artifacts")
lines.append(f"- Meta rows (chunks): {len(meta_df) if not meta_df.empty else 0}")
lines.append(f"- FAISS index: {'present' if ART_FAISS.exists() else 'missing'}")
if graph_n is not None:
    lines.append(f"- Graph nodes: {graph_n} | edges: {graph_e}")
lines.append("")

lines.append("## Ablation Results (head)")
if not abl_df.empty:
    lines.append(abl_df.head(5).to_markdown(index=False))
else:
    lines.append("_No ablation rows yet._")
lines.append("")

lines.append("## Ablation Results (tail)")
if not abl_df.empty:
    lines.append(abl_df.tail(5).to_markdown(index=False))
else:
    lines.append("_No ablation rows yet._")

OUT.write_text("\n".join(lines))
print(f"Wrote: {OUT}")


Wrote: /home/manny-buff/projects/capstone/week6-rag-graph/artifacts/Report_snippets_Wk6_1_2.md


In [6]:
# Deprecated / Skip
raise SystemExit("Deprecated cell — see 'Execution Notes' above. Use Cell K + terminal launch instead.")

"""
'Cell K: Write Streamlit App'
Purpose:
- Generate a minimal Streamlit application that uses the existing Week6-1/2 artifacts:
  dense retrieval -> graph expansion -> MMR re-ranking -> context -> Qwen answer (deterministic).
- The app logs each query/answer to CSV under artifacts/app_logs.csv for later reporting.

Notes:
- No external .py packaging needed; this cell writes a single file and we invoke Streamlit on it.
"""

from pathlib import Path
APP_DIR = Path("/home/manny-buff/projects/capstone/week6-rag-graph/app")
APP_DIR.mkdir(parents=True, exist_ok=True)
APP_PATH = APP_DIR / "app_rag.py"

app_code = r'''# -*- coding: utf-8 -*-
"""
Streamlit UI for Qwen-Graph-RAG
- Loads Week6 run_config and artifacts
- Dense retrieval -> Graph expansion -> MMR re-ranking -> Context build -> Deterministic Qwen answer
- Logs to artifacts/app_logs.csv
"""
import os, json, time, csv, math, textwrap, pickle
from pathlib import Path
import numpy as np
import pandas as pd
import streamlit as st

# --- Config & Artifacts ---
ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")
CFG_RUN = ROOT / "configs" / "rag_graph_run_config.json"
VDB     = ROOT / "artifacts" / "vdb"
ART_META   = VDB / "chunks_meta.parquet"
ART_CHUNKS = VDB / "chunks_text.pkl"
ART_FAISS  = VDB / "faiss.index"
ART_GRAPH  = VDB / "graph.pkl"
LOG_CSV    = ROOT / "artifacts" / "app_logs.csv"

# --- Caches ---
@st.cache_resource(show_spinner=False)
def load_config():
    cfg = json.loads(CFG_RUN.read_text())
    return cfg

@st.cache_resource(show_spinner=False)
def load_artifacts():
    import faiss, networkx as nx
    meta   = pd.read_parquet(ART_META)
    chunks = pickle.loads(ART_CHUNKS.read_bytes())
    index  = faiss.read_index(str(ART_FAISS))
    with open(ART_GRAPH, "rb") as f:
        G = pickle.load(f)
    return meta, chunks, index, G

@st.cache_resource(show_spinner=False)
def get_embedder(model_id: str):
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer(model_id)
    return model

@st.cache_resource(show_spinner=False)
def get_qwen(local_dir: Path, model_id: str):
    import torch
    from transformers import (
        AutoConfig, AutoProcessor, AutoModelForImageTextToText,
        AutoTokenizer, AutoModelForCausalLM
    )
    class QwenAnswerer:
        def __init__(self, local_dir: Path, model_id: str):
            self.local_dir = local_dir
            self.model_id  = model_id
            self.is_vl     = False
            self.processor = None
            self.tokenizer = None
            self.model     = None

        def _load_from(self, src: str):
            cfg = AutoConfig.from_pretrained(src, trust_remote_code=True)
            mtype = getattr(cfg, "model_type", "").lower()
            if "vl" in mtype:
                self.is_vl = True
                self.processor = AutoProcessor.from_pretrained(src, trust_remote_code=True, use_fast=False)
                self.model = AutoModelForImageTextToText.from_pretrained(
                    src, trust_remote_code=True,
                    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                    device_map="auto"
                )
            else:
                self.is_vl = False
                self.tokenizer = AutoTokenizer.from_pretrained(src, trust_remote_code=True)
                self.model = AutoModelForCausalLM.from_pretrained(
                    src, trust_remote_code=True,
                    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                    device_map="auto"
                )

        def load(self):
            last_err = None
            sources = []
            if self.local_dir.exists():
                sources.append(str(self.local_dir))
            sources.append(self.model_id)
            for src in sources:
                try:
                    self._load_from(src)
                    return self
                except Exception as e:
                    last_err = e
                    self.processor = self.tokenizer = self.model = None
            raise RuntimeError(f"Failed to load Qwen. Last error: {last_err}")

        def _format_prompt(self, question: str, context: str) -> dict:
            system_msg = "You are a concise home-repair RAG assistant. Use ONLY the provided context. If missing info, say so."
            user_msg   = f"Question:\n{question}\n\nContext:\n{context}\n\nRespond concisely and cite key source file names."
            if self.is_vl:
                tok = self.processor.tokenizer
            else:
                tok = self.tokenizer
            apply_chat = getattr(tok, "apply_chat_template", None)
            if callable(apply_chat):
                messages = [
                    {"role": "system", "content": system_msg},
                    {"role": "user",   "content": user_msg}
                ]
                prompt_text = apply_chat(messages, tokenize=False, add_generation_prompt=True)
            else:
                prompt_text = f"{system_msg}\n\n{user_msg}"
            return {"text": prompt_text}

        def answer(self, question: str, context: str, max_new_tokens: int = 240):
            assert self.model is not None
            prompt = self._format_prompt(question, context)
            if hasattr(self, "processor") and self.processor is not None and self.is_vl:
                inputs = self.processor(**prompt, return_tensors="pt")
                inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
                with torch.no_grad():
                    out = self.model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
                text = self.processor.tokenizer.decode(out[0], skip_special_tokens=True)
            else:
                inputs = self.tokenizer(prompt["text"], return_tensors="pt").to(self.model.device)
                with torch.no_grad():
                    out = self.model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
                text = self.tokenizer.decode(out[0], skip_special_tokens=True)
            return text.strip()

    return QwenAnswerer(local_dir, model_id).load()

# --- Retrieval utilities ---
def dense_retrieve(embedder, index, query: str, top_k: int):
    qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)
    D, I = index.search(qv.astype(np.float32), top_k)
    return qv[0], [int(x) for x in I[0]], D[0].tolist()

def expand_via_graph(G, seeds, hop_limit: int, per_seed_cap: int = 20, global_cap: int = 200):
    visited = set(int(s) for s in seeds)
    frontier = set(visited)
    for _ in range(hop_limit):
        nxt = set()
        for node in list(frontier):
            nbrs = list(G.neighbors(node))
            for nb in nbrs[:per_seed_cap]:
                nb = int(nb)
                if nb not in visited:
                    nxt.add(nb)
        frontier = nxt
        visited.update(frontier)
        if len(visited) >= global_cap:
            break
    return list(visited)

def mmr_select(embedder, query_vec, candidate_ids, chunks, k=12, lambda_weight=0.70):
    # Compute embeddings for candidates (batch)
    ctext = [chunks[cid] for cid in candidate_ids]
    C = embedder.encode(ctext, convert_to_numpy=True, normalize_embeddings=True)
    # Similarity query->candidate
    sim_q = C @ query_vec  # cosine if normalized
    selected = []
    remaining = list(range(len(candidate_ids)))
    # Greedy MMR
    while remaining and len(selected) < k:
        if not selected:
            idx = int(np.argmax(sim_q[remaining]))
            chosen = remaining.pop(idx)
            selected.append(chosen)
            continue
        # diversity term: max sim to already selected
        S = C[[remaining]] @ C[[candidate_ids.index(candidate_ids[s]) for s in selected]].T  # but we need matching shapes
        # Simpler: compute max sim to any selected row directly
        max_sim = np.max(C[remaining] @ C[selected].T, axis=1)
        # MMR score
        mmr = lambda_weight * sim_q[remaining] - (1 - lambda_weight) * max_sim
        idx = int(np.argmax(mmr))
        chosen = remaining.pop(idx)
        selected.append(chosen)
    # Map back to chunk ids
    return [candidate_ids[i] for i in selected]

def build_context(meta, chunks, ordered_ids, max_chars=4000):
    out = []
    size = 0
    for cid in ordered_ids:
        row = meta.loc[meta["chunk_id"]==cid]
        if row.empty: 
            continue
        path = row.iloc[0]["path"]
        idx  = row.iloc[0]["chunk_idx"]
        snippet = textwrap.shorten(chunks[cid], width=360, placeholder=" …")
        block = f"[SOURCE] {path} | chunk#{idx}\n{snippet}\n"
        if size + len(block) > max_chars:
            break
        out.append(block)
        size += len(block)
    return "\n".join(out)

# --- UI ---
st.set_page_config(page_title="Qwen Graph-RAG", page_icon="🧭", layout="wide")
st.title("Qwen Graph-RAG (Week 6 — Application)")

cfg = load_config()
meta, chunks, index, G = load_artifacts()
embedder = get_embedder(cfg["embed_model"])
qwen = None

with st.sidebar:
    st.subheader("Settings")
    top_k    = st.slider("Retriever K", 3, 20, int(cfg.get("retriever_k", 5)), 1)
    hop_lim  = st.slider("Hop Limit", 0, 3, int(cfg.get("hop_limit", 2)), 1)
    per_cap  = st.slider("Per-seed Neighbor Cap", 5, 50, 15, 1)
    glob_cap = st.slider("Global Cap", 50, 800, 200, 50)
    mmr_k    = st.slider("MMR Select K (context items)", 4, 20, 12, 1)
    mmr_lmb  = st.slider("MMR λ (relevance vs diversity)", 0.10, 0.95, 0.70, 0.05)
    max_chars= st.slider("Context Max Chars", 1000, 8000, 4000, 250)
    load_llm = st.checkbox("Load Qwen model", value=False, help="Check this once before first query.")
    st.caption("Deterministic generation (do_sample=False).")

if load_llm and qwen is None:
    try:
        qwen = get_qwen(Path(cfg["llm_local_path"]), cfg["llm_model_id"])
        st.success("Qwen loaded.")
    except Exception as e:
        st.error(f"Qwen load error: {e}")

query = st.text_area("Ask a question about home repair:", height=100, placeholder="e.g., How can I stop a toilet tank from sweating in humid weather?")
run = st.button("Search & Answer")

colL, colR = st.columns([1,1])

if run and query.strip():
    t0 = time.time()
    qv, seeds, scores = dense_retrieve(embedder, index, query, top_k=top_k)
    expanded = expand_via_graph(G, seeds, hop_limit=hop_lim, per_seed_cap=per_cap, global_cap=glob_cap)

    # prioritize seeds, then expanded unique
    cand = []
    seen = set()
    for c in seeds + expanded:
        if c not in seen:
            cand.append(c); seen.add(c)

    # MMR selection for context
    try:
        selected = mmr_select(embedder, qv, cand, chunks, k=mmr_k, lambda_weight=mmr_lmb)
    except Exception:
        # fallback: take first mmr_k
        selected = cand[:mmr_k]

    context = build_context(meta, chunks, selected, max_chars=max_chars)

    with colL:
        st.markdown("### Top Sources (MMR-selected)")
        for i, cid in enumerate(selected, 1):
            row = meta.loc[meta["chunk_id"]==cid].iloc[0]
            st.write(f"[{i}] {row['path']} (chunk {row['chunk_idx']})")

        st.markdown("### Context Preview")
        st.code(context[:1000] + (" ..." if len(context) > 1000 else ""), language="text")

    answer = ""
    lat = None
    with colR:
        if load_llm:
            try:
                t1 = time.time()
                answer = qwen.answer(query, context, max_new_tokens=256)
                lat = round(time.time() - t1, 2)
                st.markdown("### Answer")
                st.write(answer)
                st.caption(f"LLM latency: {lat}s")
            except Exception as e:
                st.error(f"Answer error: {e}")
        else:
            st.info("Load the Qwen model in the sidebar to generate an answer.")
            st.markdown("### Answer (not generated)")
            st.write("")

    # Log row
    try:
        LOG_CSV.parent.mkdir(parents=True, exist_ok=True)
        file_exists = LOG_CSV.exists()
        with open(LOG_CSV, "a", newline="") as f:
            w = csv.DictWriter(f, fieldnames=[
                "ts","query","top_k","hop_limit","per_seed_cap","global_cap","mmr_k","mmr_lambda","latency_s","seeds","expanded_size","selected_ids"
            ])
            if not file_exists:
                w.writeheader()
            w.writerow({
                "ts": time.strftime("%Y-%m-%d %H:%M:%S"),
                "query": query,
                "top_k": top_k,
                "hop_limit": hop_lim,
                "per_seed_cap": per_cap,
                "global_cap": glob_cap,
                "mmr_k": mmr_k,
                "mmr_lambda": mmr_lmb,
                "latency_s": lat if lat is not None else "",
                "seeds": str(seeds),
                "expanded_size": len(expanded),
                "selected_ids": str(selected)
            })
    except Exception as e:
        st.warning(f"Could not write log: {e}")

    st.success(f"Done in {round(time.time()-t0, 2)}s.")
'''

APP_PATH.write_text(app_code)
print(f"Wrote app to: {APP_PATH}")


SystemExit: Deprecated cell — see 'Execution Notes' above. Use Cell K + terminal launch instead.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [7]:
# Deprecated / Skip
raise SystemExit("Deprecated cell — see 'Execution Notes' above. Use Cell K + terminal launch instead.")

"""
'Cell L: Run Streamlit'
- Launch the app in this environment.
- Stop with Ctrl+C in terminal if launched from a terminal; via notebook, it'll stream logs until you interrupt.
"""
import os, subprocess, sys
subprocess.run(["bash","-lc", f"cd /home/manny-buff/projects/capstone/week6-rag-graph/app && streamlit run app_rag.py"], check=False)


SystemExit: Deprecated cell — see 'Execution Notes' above. Use Cell K + terminal launch instead.

In [None]:
# Week 6 — Part 3: Application (Summary)
'''
**UI**: Streamlit single-file app (`app/app_rag.py`).  
**Pipeline**: Dense retrieval → Graph expansion → MMR re-ranking → Context build → Qwen (deterministic).  
**Model**: Qwen-VL (local path preferred), loaded once and cached in `st.session_state`.  
**Determinism**: `do_sample=False`; no temperature/top-p/top-k.  
**Logs**: `artifacts/app_logs.csv` (query, settings, seeds, expanded size, selected IDs, latency).  

### How to run
1. Write app from Cell K (once).  
2. Terminal:  
    source ~/venvs/core-rag/bin/activate
    STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
    streamlit run /home/manny-buff/projects/capstone/week6-rag-graph/app/app_rag.py
        --server.headless true --server.port 8501
3. In the UI, check **Load Qwen model**, ask questions.

### Validations observed
- Top sources reflect relevant chunks from the FAISS+Graph pipeline.
- Context preview trimmed to the configured character budget.
- Answers generated without prompt echo or HF noise.
'''

In [14]:
"""
'Report Retrieval Script #2 — CLEAN VERSION (Week6-3)'
- Deterministic beam search with anti-repetition.
- Bullet de-duplication for cleaner report text.
- Outputs: artifacts/Report_snippets_Wk6_3.md
"""

from pathlib import Path
import json, pickle, time, textwrap
import pandas as pd
import numpy as np

ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")
CFG_RUN = ROOT / "configs" / "rag_graph_run_config.json"
VDB     = ROOT / "artifacts" / "vdb"
ART_META   = VDB / "chunks_meta.parquet"
ART_CHUNKS = VDB / "chunks_text.pkl"
ART_FAISS  = VDB / "faiss.index"
ART_GRAPH  = VDB / "graph.pkl"
LOG_CSV    = ROOT / "artifacts" / "app_logs.csv"
OUT_MD     = ROOT / "artifacts" / "Report_snippets_Wk6_3.md"

# ---- Load config & artifacts
cfg = json.loads(CFG_RUN.read_text())
meta   = pd.read_parquet(ART_META)
chunks = pickle.loads(ART_CHUNKS.read_bytes())

import faiss, networkx as nx
index  = faiss.read_index(str(ART_FAISS))
with open(ART_GRAPH, "rb") as f:
    G = pickle.load(f)

from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer(cfg["embed_model"])

# ---- Qwen loader (VL-aware)
import torch
from transformers import (
    AutoConfig, AutoProcessor, AutoModelForImageTextToText,
    AutoTokenizer, AutoModelForCausalLM
)
from transformers.utils import logging as hf_logging
hf_logging.set_verbosity_error()

GEN_KW = dict(                 # anti-repetition, deterministic
    do_sample=False,
    num_beams=4,
    early_stopping=True,
    no_repeat_ngram_size=6,
    repetition_penalty=1.1
)

class QwenAnswerer:
    def __init__(self, local_dir: Path, model_id: str):
        self.local_dir = Path(local_dir)
        self.model_id  = model_id
        self.is_vl     = False
        self.processor = None
        self.tokenizer = None
        self.model     = None
    def _load_from(self, src: str):
        cfg = AutoConfig.from_pretrained(src, trust_remote_code=True)
        mtype = getattr(cfg, "model_type", "").lower()
        if "vl" in mtype:
            self.is_vl = True
            self.processor = AutoProcessor.from_pretrained(src, trust_remote_code=True, use_fast=False)
            self.model = AutoModelForImageTextToText.from_pretrained(
                src, trust_remote_code=True,
                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                device_map="auto"
            )
        else:
            self.is_vl = False
            self.tokenizer = AutoTokenizer.from_pretrained(src, trust_remote_code=True)
            self.model = AutoModelForCausalLM.from_pretrained(
                src, trust_remote_code=True,
                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                device_map="auto"
            )
    def load(self):
        last = None
        for src in ([str(self.local_dir)] if self.local_dir.exists() else []) + [self.model_id]:
            try:
                self._load_from(src); return self
            except Exception as e: last = e
        raise RuntimeError(f"Qwen load failed: {last}")

    def _prompt(self, q, ctx):
        sys = "You are a concise home-repair RAG assistant. Use ONLY the provided context."
        user = (
            f"Question:\n{q}\n\nContext:\n{ctx}\n\n"
            "Respond in 4–8 bullet points, each UNIQUE and action-oriented. "
            "End with a bracketed list of 2–4 source file names.\n"
        )
        tok = self.processor.tokenizer if self.is_vl else self.tokenizer
        apply_chat = getattr(tok, "apply_chat_template", None)
        if callable(apply_chat):
            msgs = [{"role":"system","content":sys},{"role":"user","content":user}]
            return apply_chat(msgs, tokenize=False, add_generation_prompt=True)
        return f"{sys}\n\n{user}"

    def _dedupe_bullets(self, text: str, min_b=4, max_b=8) -> str:
        lines = [ln.strip() for ln in text.splitlines()]
        seen, out = set(), []
        for ln in lines:
            if not ln: continue
            if ln.startswith(("-", "*", "•")):
                core = ln.lstrip("-*• ").strip()
                key  = core.lower()
                if key in seen: 
                    continue
                seen.add(key)
                out.append("- " + core)
            else:
                out.append(ln)
        # Clamp bullet count
        bullets = [l for l in out if l.startswith("- ")]
        if len(bullets) > max_b:
            kept, rest = 0, []
            new_out = []
            for l in out:
                if l.startswith("- ") and kept < max_b:
                    new_out.append(l); kept += 1
                elif not l.startswith("- "):
                    rest.append(l)
            out = new_out + rest
        return "\n".join(out).strip()

    def answer(self, q, ctx, max_new_tokens=220):
        ptxt = self._prompt(q, ctx)
        if self.is_vl:
            inp = self.processor(text=ptxt, return_tensors="pt")
            inp = {k:v.to(self.model.device) for k,v in inp.items()}
            inlen = int(inp["input_ids"].shape[1])
            with torch.no_grad():
                out = self.model.generate(**inp, max_new_tokens=max_new_tokens, **GEN_KW)
            gen = out[0][inlen:]
            txt = self.processor.tokenizer.decode(gen, skip_special_tokens=True)
        else:
            inp = self.tokenizer(ptxt, return_tensors="pt").to(self.model.device)
            inlen = int(inp["input_ids"].shape[1])
            with torch.no_grad():
                out = self.model.generate(**inp, max_new_tokens=max_new_tokens, **GEN_KW)
            gen = out[0][inlen:]
            txt = self.tokenizer.decode(gen, skip_special_tokens=True)
        return self._dedupe_bullets(txt)

qwen = QwenAnswerer(cfg["llm_local_path"], cfg["llm_model_id"]).load()

# ---- Retrieval helpers
def dense_retrieve(query: str, top_k: int):
    qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True)
    D, I = index.search(qv.astype(np.float32), top_k)
    return qv[0], [int(x) for x in I[0]]

def expand_via_graph(seeds, hop_limit: int, per_seed_cap: int = 20, global_cap: int = 200):
    visited = set(int(s) for s in seeds); frontier = set(visited)
    for _ in range(hop_limit):
        nxt = set()
        for node in list(frontier):
            for nb in list(G.neighbors(node))[:per_seed_cap]:
                nb = int(nb)
                if nb not in visited: nxt.add(nb)
        frontier = nxt; visited.update(frontier)
        if len(visited) >= global_cap: break
    return list(visited)

def mmr_select(query_vec, cand_ids, k=12, lambda_weight=0.70):
    if not cand_ids: return []
    C = embedder.encode([chunks[int(c)] for c in cand_ids],
                        convert_to_numpy=True, normalize_embeddings=True)
    rel = C @ query_vec
    selected, pool = [], list(range(len(cand_ids)))
    while pool and len(selected) < k:
        if not selected:
            best = int(np.argmax(rel[pool])); chosen = pool.pop(best); selected.append(chosen); continue
        sim_to_S = C[pool] @ C[selected].T
        max_div = sim_to_S.max(axis=1) if sim_to_S.ndim == 2 else sim_to_S
        mmr = lambda_weight * rel[pool] - (1.0 - lambda_weight) * max_div
        best = int(np.argmax(mmr)); chosen = pool.pop(best); selected.append(chosen)
    return [int(cand_ids[i]) for i in selected]

def build_context(ids, max_chars=4000):
    if not ids: return ""
    out, size = [], 0
    for cid in ids:
        row = meta.loc[meta["chunk_id"]==int(cid)]
        if row.empty: continue
        path = row.iloc[0]["path"]; idx = int(row.iloc[0]["chunk_idx"])
        snip = textwrap.shorten(chunks[int(cid)], width=360, placeholder=" …")
        block = f"[SOURCE] {path} | chunk#{idx}\n{snip}\n"
        if size + len(block) > max_chars: break
        out.append(block); size += len(block)
    return "\n".join(out)

# ---- Build snippet
lines = ["# Week 6 — Report Snippets (Part 3: Application)\n"]
if not LOG_CSV.exists():
    lines.append("_No app logs yet (artifacts/app_logs.csv missing)._")
    OUT_MD.write_text("\n".join(lines)); print(f"Wrote: {OUT_MD}"); raise SystemExit

logs = pd.read_csv(LOG_CSV)
runs = len(logs); lat = logs["latency_s"].dropna()
avg_lat = lat.mean() if not lat.empty else None
lines += [f"- Total runs: **{runs}**",
          f"- Average LLM latency: **{avg_lat:.2f}s**" if avg_lat is not None else "- Average LLM latency: _n/a_",
          ""]

N = min(3, runs)
for i, row in logs.tail(N).reset_index(drop=True).iterrows():
    q = str(row["query"])
    top_k    = int(row.get("top_k", cfg.get("retriever_k", 5)))
    hop_lim  = int(row.get("hop_limit", cfg.get("hop_limit", 2)))
    per_cap  = int(row.get("per_seed_cap", 20))
    glob_cap = int(row.get("global_cap", 200))
    mmr_k    = int(row.get("mmr_k", 12))
    mmr_lmb  = float(row.get("mmr_lambda", 0.70))

    qv, seeds = dense_retrieve(q, top_k)
    expanded  = expand_via_graph(seeds, hop_lim, per_seed_cap=per_cap, global_cap=glob_cap)
    cand, seen = [], set()
    for c in seeds + expanded:
        c = int(c)
        if c not in seen: cand.append(c); seen.add(c)
    selected = mmr_select(qv, cand, k=mmr_k, lambda_weight=mmr_lmb) or cand[:mmr_k]
    ctx = build_context(selected, max_chars=4000)

    t0 = time.time()
    ans = qwen.answer(q, ctx, max_new_tokens=240)
    lat2 = round(time.time() - t0, 2)

    srcs = []
    for cid in selected[:6]:
        rm = meta.loc[meta["chunk_id"]==int(cid)]
        if rm.empty: continue
        srcs.append(Path(rm.iloc[0]["path"]).name)
    srcs_txt = "; ".join(dict.fromkeys(srcs))

    lines += [
        f"## Sample {i+1}",
        f"**Query:** {q}",
        f"**Settings:** K={top_k}, hops={hop_lim}, per_cap={per_cap}, global_cap={glob_cap}, MMR_k={mmr_k}, λ={mmr_lmb}",
        f"**Answer (lat {lat2}s):**\n\n{ans}\n",
        f"**Sources:** {srcs_txt}\n"
    ]

OUT_MD.write_text("\n".join(lines))
print(f"Wrote: {OUT_MD}")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Wrote: /home/manny-buff/projects/capstone/week6-rag-graph/artifacts/Report_snippets_Wk6_3.md


In [15]:
"""
'Cell R: Assemble Report.md'
Purpose:
- Combine Week6 Part 1&2 snippet + Part 3 snippet into a single Report.md at the project root.
- Adds a short cover with project info and run timestamp.

Outputs:
- /home/manny-buff/projects/capstone/week6-rag-graph/Report.md
"""

from pathlib import Path
import time

ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")
SNIP_12 = ROOT / "artifacts" / "Report_snippets_Wk6_1_2.md"
SNIP_3  = ROOT / "artifacts" / "Report_snippets_Wk6_3.md"
OUT     = ROOT / "Report.md"

# 'Read snippets safely (empty if missing)'
snip12 = SNIP_12.read_text() if SNIP_12.exists() else "_Part 1&2 snippet missing._"
snip3  = SNIP_3.read_text()  if SNIP_3.exists()  else "_Part 3 snippet missing._"

# 'Cover'
cover = f"""# Capstone — Week 6: Next-Level RAG (Graph-RAG → Multi-Hop → Application)

**Author:** manny-buff  
**Generated:** {time.strftime('%Y-%m-%d %H:%M:%S')}

This report compiles:
- Part 1: Graph-RAG Build
- Part 2: Multi-Hop QA (dense → graph expansion → Qwen synthesis)
- Part 3: Streamlit Application (logs & sample answers)

Artifacts are created under `artifacts/` per the run config in `configs/rag_graph_run_config.json`.

---

"""

# 'Assemble and write'
OUT.write_text(cover + snip12 + "\n\n---\n\n" + snip3 + "\n")
print(f"Wrote: {OUT}")
print("\nReport preview:")
print(OUT.read_text().splitlines()[0:30])  # show first ~30 lines as a quick probe


Wrote: /home/manny-buff/projects/capstone/week6-rag-graph/Report.md

Report preview:
['# Capstone — Week 6: Next-Level RAG (Graph-RAG → Multi-Hop → Application)', '', '**Author:** manny-buff  ', '**Generated:** 2025-10-05 16:28:47', '', 'This report compiles:', '- Part 1: Graph-RAG Build', '- Part 2: Multi-Hop QA (dense → graph expansion → Qwen synthesis)', '- Part 3: Streamlit Application (logs & sample answers)', '', 'Artifacts are created under `artifacts/` per the run config in `configs/rag_graph_run_config.json`.', '', '---', '', '# Week 6 — Report Snippets (Parts 1 & 2)', '', '## Environment', '- Python venv: `/home/manny-buff/venvs/core-rag`', '- Python: `3.11.9`', '- GPU: `NVIDIA GeForce RTX 4080, 580.65.06, 16376 MiB`', '', '## Run Config', '- corpus_root: `/home/manny-buff/projects/capstone/hw-rag/data/`', '- embed_model: `intfloat/e5-small-v2`', '- llm_model_id: `Qwen/Qwen2.5-VL-3B-Instruct`', '- llm_local_path: `/home/manny-buff/projects/capstone/hw-rag/models/Qwen2-VL-2B-I

In [20]:
"""
'Cell S: Generate README.md, requirements.txt, and .gitignore'
Purpose:
- Create concise, reproducible project metadata files at the repo root.

Files:
- README.md
- requirements.txt
- .gitignore
"""

from pathlib import Path
ROOT = Path("/home/manny-buff/projects/capstone/week6-rag-graph")

readme = f"""# Qwen Graph-RAG — Week 6

This repo contains a local, config-driven Graph-RAG pipeline with Multi-Hop QA and a Streamlit application.
It reuses the Week5 dataset and Qwen model paths, and writes reproducible configs and artifacts.

## Quickstart


# 1) Python env
python -m venv ~/venvs/core-rag
source ~/venvs/core-rag/bin/activate
pip install -r requirements.txt

# 2) Build index + graph (Week6-1)
# open notebooks/Week6-1-HW.ipynb and run Cells C→F

# 3) Multi-Hop QA (Week6-2)
# open notebooks/Week6-2-HW.ipynb and run Cells G→J
# (ensure you see ablation rows in artifacts/ablation_results_graph.csv)

# 4) Application (Week6-3)
# run Cell K to write the app file (once), then from terminal:
streamlit run app/app_rag.py --server.headless true --server.port 8501

Structure
week6-rag-graph/
├─ app/
│  └─ app_rag.py
├─ artifacts/
│  ├─ vdb/                # FAISS index, graph, chunk meta
│  ├─ app_logs.csv        # Streamlit runs (app appends)
│  ├─ ablation_results_graph.csv
│  ├─ Report_snippets_Wk6_1_2.md
│  └─ Report_snippets_Wk6_3.md
├─ configs/
│  ├─ env_rag_graph.json
│  └─ rag_graph_run_config.json
├─ notebooks/
│  ├─ Week6-1-HW.ipynb    # Build
│  ├─ Week6-2-HW.ipynb    # Multi-Hop QA
│  └─ Week6-3-HW.ipynb    # Application + reporting
└─ Report.md

Configuration

Update configs/rag_graph_run_config.json to point at:

"corpus_root": "/home/manny-buff/projects/capstone/hw-rag/data/"

"llm_local_path": "/home/manny-buff/projects/capstone/hw-rag/models/Qwen2-VL-2B-Instruct/"

"embed_model": "intfloat/e5-small-v2"

"llm_model_id": "Qwen/Qwen2.5-VL-3B-Instruct"

Notes

Deterministic generation (do_sample=False) for evaluation.

Beam search with no-repeat for report regeneration to reduce repetition.
"""

requirements = """# Core
numpy
pandas
networkx
matplotlib

Embeddings / LLM

sentence-transformers
transformers>=4.45.0
accelerate>=0.34.0
safetensors

Vector store / parsing

faiss-cpu
pypdf
beautifulsoup4
lxml
tqdm

App

streamlit
"""

gitignore = """# Python
pycache/
*.pyc
*.pyo
*.pyd
*.egg-info/
.ipynb_checkpoints/
.DS_Store

Virtual envs (keep them local)

.venv/
venv/
env/
venvs/
*.env

Large, derived, or local-only artifacts

artifacts/vdb/*
artifacts/app_logs.csv

Keep important small outputs

!artifacts/ablation_results_graph.csv
!artifacts/Report_snippets_Wk6_1_2.md
!artifacts/Report_snippets_Wk6_3.md

Models / data paths are external (do not commit local copies)

models/
data/
"""

# Write files

(ROOT / "README.md").write_text(readme)
(ROOT / "requirements.txt").write_text(requirements)
(ROOT / ".gitignore").write_text(gitignore)

print("Wrote:")
for f in ["README.md", "requirements.txt", ".gitignore"]:
    p = ROOT / f
print("-", p, f"(size={p.stat().st_size} bytes)")

print("\nREADME preview:")
print((ROOT/"README.md").read_text().splitlines()[:24])

Wrote:
- /home/manny-buff/projects/capstone/week6-rag-graph/.gitignore (size=444 bytes)

README preview:
['# Qwen Graph-RAG — Week 6', '', 'This repo contains a local, config-driven Graph-RAG pipeline with Multi-Hop QA and a Streamlit application.', 'It reuses the Week5 dataset and Qwen model paths, and writes reproducible configs and artifacts.', '', '## Quickstart', '', '', '# 1) Python env', 'python -m venv ~/venvs/core-rag', 'source ~/venvs/core-rag/bin/activate', 'pip install -r requirements.txt', '', '# 2) Build index + graph (Week6-1)', '# open notebooks/Week6-1-HW.ipynb and run Cells C→F', '', '# 3) Multi-Hop QA (Week6-2)', '# open notebooks/Week6-2-HW.ipynb and run Cells G→J', '# (ensure you see ablation rows in artifacts/ablation_results_graph.csv)', '', '# 4) Application (Week6-3)', '# run Cell K to write the app file (once), then from terminal:', 'streamlit run app/app_rag.py --server.headless true --server.port 8501', '']
