# Baseline RAG (retrieve → generate) and timeframe drift failure

This notebook loads the persisted Chroma index from Notebook 02 and runs a simple retrieve-then-generate baseline with OpenAI chat completions. It intentionally does **not** enforce recency so we can observe timeframe drift.

In [None]:
from __future__ import annotations

import os
import sys
from pathlib import Path

import pandas as pd


def _find_project_root() -> Path:
    cwd = Path.cwd().resolve()
    for base in (cwd, *cwd.parents):
        if (base / "src" / "config.py").exists():
            return base
        nested = base / "agentic-rag-second-brain"
        if (nested / "src" / "config.py").exists():
            return nested
    raise RuntimeError("Could not locate project root containing src/config.py")

PROJECT_ROOT = _find_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
os.chdir(PROJECT_ROOT)

from src.config import settings
from src.rag_baseline import baseline_rag_answer
from src.retrieval import load_persisted_index, retrieve_chunks

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
OPENAI_MODEL = os.getenv("OPENAI_MODEL", settings.openai_model)
EMBED_MODEL = os.getenv("EMBED_MODEL", settings.embed_model)
CHROMA_DIR = Path(os.getenv("CHROMA_DIR", settings.chroma_dir)).resolve()
TOP_K = int(os.getenv("TOP_K", settings.top_k))
TEMPERATURE = float(os.getenv("TEMPERATURE", settings.temperature))
MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", settings.max_context_chars))

print("Config:")
print(f"- PROJECT_ROOT: {PROJECT_ROOT}")
print(f"- CHROMA_DIR: {CHROMA_DIR}")
print(f"- EMBED_MODEL: {EMBED_MODEL}")
print(f"- OPENAI_MODEL: {OPENAI_MODEL}")
print(f"- TOP_K: {TOP_K}")
print(f"- TEMPERATURE: {TEMPERATURE}")
print(f"- MAX_CONTEXT_CHARS: {MAX_CONTEXT_CHARS}")
print(f"- OPENAI_API_KEY set: {'yes' if OPENAI_API_KEY else 'no'}")


In [None]:
if not OPENAI_API_KEY:
    raise EnvironmentError(
        "OPENAI_API_KEY is required for Notebook 03. "
        "Set it before running, for example: `export OPENAI_API_KEY='your-key'`."
    )


In [None]:
index = None
try:
    index = load_persisted_index(chroma_dir=CHROMA_DIR, embed_model=EMBED_MODEL)
    print(f"Loaded persisted index from: {CHROMA_DIR}")
except FileNotFoundError as err:
    print(str(err))
    print("Please run notebooks/02_indexing_chroma_llamaindex.ipynb first, then re-run this notebook.")


In [None]:
demo_queries = {
    "Q1 easy win": "What chunking overlap is currently recommended?",
    "Q2 drift question": "What embedding model should we use?",
}

demo_queries


In [None]:
if index is None:
    print("Skipping retrieval + generation because persisted index is unavailable.")
else:
    for label, query in demo_queries.items():
        print("
" + "=" * 90)
        print(f"{label}: {query}")

        retrieved = retrieve_chunks(index=index, query=query, top_k=TOP_K)
        retrieved_df = pd.DataFrame(
            [
                {
                    "score": item["score"],
                    "doc_date": item["doc_date"],
                    "doc_title": item["doc_title"],
                    "chunk_id": item["chunk_id"],
                    "snippet": item["text"][:220].replace("
", " "),
                }
                for item in retrieved
            ]
        )

        print("
Retrieved chunks:")
        display(retrieved_df)

        result = baseline_rag_answer(
            index=index,
            query=query,
            top_k=TOP_K,
            model=OPENAI_MODEL,
            temperature=TEMPERATURE,
            max_context_chars=MAX_CONTEXT_CHARS,
        )

        print("
Answer:")
        print(result["answer"])
        if result.get("notes"):
            print("
Notes:")
            print(result["notes"])

        citations_df = pd.DataFrame(result["citations"])
        print("
Citations:")
        display(citations_df)


### Why baseline RAG can fail under timeframe drift

This baseline pipeline retrieves semantically similar chunks and asks the model to answer from that context only. Because retrieval can surface chunks from different dates (older and newer recommendations), the model may blend or choose outdated guidance. In the next (agentic) notebook, we will add explicit temporal reasoning and conflict handling to improve consistency.