# **Final Indigo_Chatbot**

# **Install LangSmith**

 Sets up LangSmith tooling so you can trace, debug, and compare runs.
Ensures your project has experiment tracking from the very first step.

In [10]:
pip install -U langsmith


Collecting langsmith
  Using cached langsmith-0.4.13-py3-none-any.whl.metadata (14 kB)
Using cached langsmith-0.4.13-py3-none-any.whl (372 kB)
Installing collected packages: langsmith
  Attempting uninstall: langsmith
    Found existing installation: langsmith 0.1.147
    Uninstalling langsmith-0.1.147:
      Successfully uninstalled langsmith-0.1.147
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.2.6 requires langsmith<0.2.0,>=0.1.17, but you have langsmith 0.4.13 which is incompatible.
langchain-core 0.2.43 requires langsmith<0.2.0,>=0.1.112, but you have langsmith 0.4.13 which is incompatible.
langchain-community 0.2.6 requires langsmith<0.2.0,>=0.1.0, but you have langsmith 0.4.13 which is incompatible.[0m[31m
[0mSuccessfully installed langsmith-0.4.13


# **Configure LangSmith Project & Tracing**

Defines LANGCHAIN_API_KEY, enables tracing v2, and names the project.
Gives you instant observability into chains, prompts, and model calls.

In [11]:
import os

os.environ["LANGCHAIN_API_KEY"] = "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"  # replace with your key
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Indigo-RAG-Streamlit"  # name your project


# **Install Core Dependencies**

Pins and installs LangChain, ChromaDB, Sentence-Transformers, pypdf, Groq, and friends.
Locks versions for reproducible environments and fewer “works-on-my-machine” issues.

In [12]:
# Core libs (no faiss)
!pip -q install "numpy<2.0" pypdf==4.2.0
!pip -q install langchain==0.2.6 langchain-community==0.2.6
!pip -q install chromadb==0.5.3 sentence-transformers==3.0.1
!pip -q install groq==0.9.0 tiktoken==0.7.0 python-dotenv==1.0.1


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/321.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m266.2/321.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.1/321.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.35.0 requires protobuf<5,>=3.20, but you have protobuf 6.31.1 which is incompatible.
tensorflow 2.19.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3, but you have protobuf 6.31.1 which is incompatible.
grpcio-status 1.71.2 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 6.31.1 which is incompatible.
google-ai-generativelanguage 0.6.15 requires protobuf!=4.21.0,!=4.21.1,!=4.21

# **Set Groq API Key**

Adds GROQ_API_KEY to the environment so Llama3 endpoints can be called securely.
One place to manage credentials without hard-coding them in the app.

In [13]:
import os
os.environ["GROQ_API_KEY"] = "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"
print("GROQ key set:", bool(os.environ.get("GROQ_API_KEY")))


GROQ key set: True


# **Build RAG Engine (rag_core.py)**

Writes a reusable module: loaders, chunking, embeddings, Chroma, RAG answer, summarize, quiz, and logging.
Keeps the core retrieval + generation logic clean, testable, and separate from the UI.

In [14]:
%%writefile rag_core.py
import os, json, requests, sqlite3, time, random, hashlib, shutil, re
from typing import List

# ───────────────────── LangSmith tracking (kept as you wanted) ─────────────────────
# NOTE: If you push to a public repo, rotate this key later.
os.environ["LANGCHAIN_API_KEY"]    = "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"
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"]    = "Indigo-RAG-Streamlit"

from langsmith import traceable  # noqa: F401

# Runtime safety to avoid GPU meta-tensor issues in some Colab images
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# ───────────────────── Config ─────────────────────
# Speed toggle: to use a lighter, very fast embedder:
#   os.environ["EMBED_MODEL"] = "sentence-transformers/all-MiniLM-L6-v2"
EMBED_MODEL = os.getenv("EMBED_MODEL", "thenlper/gte-large")

# Primary model (can be overridden by sidebar via env)
GROQ_MODEL_PRIMARY = os.getenv("GROQ_MODEL_OVERRIDE", "llama-3.3-70b-versatile")

# Fallback model list (first try PRIMARY, then these). You can override order via env:
#   GROQ_MODEL_CANDIDATES="llama-3.1-8b-instant,deepseek-r1-distill-llama-70b,llama-3.1-70b-versatile"
DEFAULT_CANDIDATES = [
    "llama-3.3-70b-versatile",
    "deepseek-r1-distill-llama-70b",
    "llama-3.1-70b-versatile",
    "llama-3.1-8b-instant",
    "llama3-70b-8192",   # legacy aliases (if present on your account)
    "llama3-8b-8192"
]
_env_candidates = [m.strip() for m in os.getenv("GROQ_MODEL_CANDIDATES", "").split(",") if m.strip()]
GROQ_MODEL_CANDIDATES = [GROQ_MODEL_PRIMARY] + [
    m for m in (_env_candidates or DEFAULT_CANDIDATES) if m != GROQ_MODEL_PRIMARY
]

# Groq OpenAI-compatible base
GROQ_BASE = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1")

# ───────────────────── Embeddings / Chroma ─────────────────────
def _pick_device():
    """Prefer CUDA if actually available; otherwise CPU."""
    try:
        import torch
        if torch.cuda.is_available():
            return "cuda"
    except Exception:
        pass
    return "cpu"

def get_embeddings():
    """
    Load sentence-transformer; prefer GPU for speed, else fallback to CPU.
    Use bigger batch + normalization for faster, stable cosine similarity.
    """
    device = _pick_device()  # "cuda" or "cpu"
    model_name = os.getenv("EMBED_MODEL", EMBED_MODEL)
    model_kwargs = {"device": device}
    encode_kwargs = {"normalize_embeddings": True, "batch_size": 64}
    return HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )

def chunk_texts(docs, chunk_size=1800, chunk_overlap=150):
    """Fewer, larger chunks reduce embedding calls → faster indexing."""
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return splitter.split_documents(docs)

def load_pdfs_as_docs(file_paths: List[str]):
    from langchain_core.documents import Document
    out = []
    for path in file_paths:
        r = PdfReader(path)
        for i, page in enumerate(r.pages, start=1):
            t = (page.extract_text() or "").strip()
            if t:
                out.append(Document(
                    page_content=" ".join(t.split()),
                    metadata={"source": os.path.basename(path), "page": i}
                ))
    return out

def _safe_rmtree(path: str):
    try:
        if os.path.isdir(path):
            shutil.rmtree(path, ignore_errors=True)
        elif os.path.exists(path):
            os.remove(path)
    except Exception:
        pass

def build_or_update_chroma(chunks, persist_dir="chroma_db", collection_name="docs", reset: bool = False):
    """
    Build or update a Chroma index.
    - reset=True: nuke the existing DB (fastest / no-duplicates).
    - reset=False: append to the existing DB.
    """
    emb = get_embeddings()
    os.makedirs(persist_dir, exist_ok=True)

    if reset:
        # Clean the directory to rebuild from scratch
        for fn in os.listdir(persist_dir):
            _safe_rmtree(os.path.join(persist_dir, fn))

    # Chroma.from_documents() creates or rebuilds fresh; faster on clean builds
    vs = Chroma.from_documents(chunks, emb, persist_directory=persist_dir, collection_name=collection_name)
    vs.persist()
    return vs

def load_chroma(persist_dir="chroma_db", collection_name="docs"):
    emb = get_embeddings()
    return Chroma(persist_directory=persist_dir, collection_name=collection_name, embedding_function=emb)

# ───────────────────── Groq HTTP client ─────────────────────
def _sleep_backoff(attempt: int) -> None:
    """Jittered exponential backoff: 1s, 2s, 4s, 8s, 12s max."""
    time.sleep(min(12, 2 ** attempt) + random.random())

def _groq_post(payload: dict, api_key: str, base_url: str):
    url = f"{base_url}/chat/completions"
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json"}
    return requests.post(url, headers=headers, json=payload, timeout=60)

def groq_complete(prompt: str, temperature: float = 0.2, max_tokens: int = 512) -> str:
    """
    Robust Groq call with:
      • correct base URL (OpenAI-compatible),
      • POST /chat/completions,
      • retries for transient 429/5xx,
      • auto-fallback across multiple models if TPD exhausted on one.
    """
    api_key = os.getenv("GROQ_API_KEY", "")
    if not api_key:
        raise RuntimeError("GROQ_API_KEY not set")

    base_url = os.getenv("GROQ_BASE_URL", GROQ_BASE)
    primary = os.getenv("GROQ_MODEL_OVERRIDE", GROQ_MODEL_PRIMARY)

    candidates = [primary] + [m for m in GROQ_MODEL_CANDIDATES if m != primary]
    last_err = None

    for model in candidates:
        payload = {
            "model": model,
            "temperature": float(temperature),
            "max_tokens": int(max_tokens),
            "messages": [
                {"role": "system", "content": "You are a concise, grounded assistant."},
                {"role": "user", "content": prompt},
            ],
            "stream": False,
        }

        # Retry loop per model (handles RPM/TPM and 5xx). TPD → try next model immediately.
        for attempt in range(6):
            try:
                r = _groq_post(payload, api_key, base_url)

                if r.status_code == 429:
                    # Distinguish TPD (daily) vs transient rate limits
                    try:
                        j = r.json()
                        emsg = (j.get("error") or {}).get("message", "")
                        ecode = (j.get("error") or {}).get("code", "")
                    except Exception:
                        emsg, ecode = r.text, ""
                    # Tokens-per-day hard cap → switch model
                    if "tokens per day" in emsg.lower() or ecode == "rate_limit_exceeded":
                        last_err = f"TPD exhausted for `{model}`: {emsg}"
                        break
                    # Transient per-minute/TPM → backoff + retry same model
                    _sleep_backoff(attempt); last_err = emsg; continue

                if r.status_code in (500, 502, 503, 504):
                    _sleep_backoff(attempt); last_err = f"HTTP {r.status_code}"; continue

                r.raise_for_status()
                data = r.json()
                return data["choices"][0]["message"]["content"]

            except requests.RequestException as e:
                s = str(e)
                if any(code in s for code in ["429", "500", "502", "503", "504"]):
                    _sleep_backoff(attempt); last_err = s; continue
                if "Unknown request URL" in s or "unknown_url" in s:
                    raise RuntimeError(
                        f"Groq base URL looks wrong: {base_url}. "
                        "Expected 'https://api.groq.com/openai/v1'. Original error: " + s
                    )
                raise

        # If we reached here, likely TPD on this model → try next candidate
        continue

    # All candidates failed/exhausted
    raise RuntimeError(f"All Groq models exhausted/limited. Last error: {last_err}")

# ───────────────────── Helpers ─────────────────────
def _format_citations(docs) -> str:
    parts = []
    for i, d in enumerate(docs, start=1):
        src = d.metadata.get("source","unknown"); pg = d.metadata.get("page","-")
        parts.append(f"[S{i}] ({src} p.{pg})\n{d.page_content}")
    return "\n\n".join(parts)

def _coerce_json_array(raw: str):
    """
    Try to extract a valid JSON array from a messy LLM output.
    - strips code fences
    - finds the first [...] block
    - best-effort single→double quote fix (last resort)
    """
    s = raw.strip()
    # remove common code fences & leading prose like ```json ... ```
    s = re.sub(r"^```(?:json)?\s*|\s*```$", "", s, flags=re.IGNORECASE | re.MULTILINE)

    # grab the first bracketed array
    start = s.find("[")
    end   = s.rfind("]")
    if start != -1 and end != -1 and end > start:
        snippet = s[start:end+1]
        try:
            return json.loads(snippet)
        except Exception:
            pass  # fall through

    # final attempt: replace single quotes with double quotes (best-effort)
    s2 = s.replace("'", '"')
    start = s2.find("[")
    end   = s2.rfind("]")
    if start != -1 and end != -1 and end > start:
        snippet = s2[start:end+1]
        return json.loads(snippet)  # may still raise

    raise ValueError("Could not coerce JSON array from model output.")

# ───────────────────── RAG tasks ─────────────────────
def rag_answer(vs, question: str, k: int = 5, temperature: float = 0.2, max_tokens: int = 512):
    docs = vs.similarity_search(question, k=k)
    ctx = _format_citations(docs)
    prompt = f"""Answer strictly from the context. If unsure, say you don't know.
Be EXHAUSTIVE: include every step, rule, exception, and prerequisite you find.
Cite sources inline like [S1], [S2] (they map to (source,page)). Use numbered lists.

Question: {question}

Context:
{ctx}

Answer (exhaustive with citations):"""
    ans = groq_complete(prompt, temperature=temperature, max_tokens=max_tokens)
    cites = [{"slot": f"S{i+1}", "source": d.metadata.get("source"), "page": d.metadata.get("page")} for i,d in enumerate(docs)]
    return {"answer": ans, "citations": cites}

def rag_answer_exhaustive(vs, question: str, k: int = 40, batch: int = 8,
                          temperature: float = 0.0, max_tokens: int = 900):
    docs = vs.similarity_search(question, k=k)
    partials = []
    for i in range(0, len(docs), batch):
        group = docs[i:i+batch]
        ctx = _format_citations(group)
        prompt = f"""You will answer a complex question using ONLY this batch of context.
List EVERY relevant step/detail you can find. Use [S#] citations for each bullet.

Question: {question}

Context:
{ctx}

Partial answer (bulleted with citations):"""
        partials.append(groq_complete(prompt, temperature=temperature, max_tokens=max_tokens))
    merged_ctx = "\n\n".join([f"[P{i+1}] {p}" for i, p in enumerate(partials)])
    final_prompt = f"""You are merging partial answers into ONE exhaustive answer.
Remove duplicates, keep ordering logical, and preserve [S#] citations from partials.
If something conflicts, note both and cite them.

Question: {question}

Partials:
{merged_ctx}

Final exhaustive answer (numbered list with [S#] citations):"""
    final = groq_complete(final_prompt, temperature=0.0, max_tokens=1200)
    cites = [{"slot": f"S{i+1}", "source": d.metadata.get("source"), "page": d.metadata.get("page")}
             for i, d in enumerate(docs[:min(15, len(docs))])]
    return {"answer": final, "citations": cites}

def summarize(vs, topic_hint: str = "overview", k: int = 20, max_tokens: int = 700):
    docs = vs.similarity_search(topic_hint, k=k)
    ctx = _format_citations(docs)
    prompt = f"""Summarize the following content clearly and concisely as bullet points with key steps/terms.

Context:
{ctx}

Summary:"""
    return groq_complete(prompt, temperature=0.2, max_tokens=max_tokens)

def quiz(vs, topic_hint: str = "", num: int = 5, k: int = 15, max_tokens: int = 900):
    q = topic_hint if topic_hint.strip() else "important concepts and procedures"
    docs = vs.similarity_search(q, k=k)
    ctx = _format_citations(docs)

    # Stricter, JSON-only instructions
    prompt = f"""Generate {num} MCQs from the context.
Rules:
- USE ONLY the context.
- Output MUST be a JSON array (no markdown, no prose), exactly like:
[
  {{"question":"...","options":["A","B","C","D"],"answer":"A","why":"1-line rationale"}},
  ...
]
- "options" must have 4 strings. "answer" must be exactly one of the options.

Context:
{ctx}
"""

    # Low temperature to reduce chatty outputs
    raw = groq_complete(prompt, temperature=0.0, max_tokens=max_tokens)

    # Try strict parse … then tolerant parse … otherwise return raw
    try:
        data = json.loads(raw)
        if isinstance(data, list):
            return data
    except Exception:
        pass

    # tolerant parse
    try:
        data = _coerce_json_array(raw)
        if isinstance(data, list):
            return data
    except Exception:
        pass

    # if still not JSON, surface the raw for debugging (UI shows it)
    return [{"raw": raw}]

# ───────────────────── SQLite logger ─────────────────────
def init_logger(db_path: str = "rag_logs.db"):
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    cur.execute("""
    CREATE TABLE IF NOT EXISTS interactions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        ts INTEGER,
        session_id TEXT,
        user TEXT,
        question TEXT,
        mode TEXT,
        model TEXT,
        top_k INTEGER,
        max_tokens INTEGER,
        temperature REAL,
        answer TEXT,
        citations TEXT,
        error TEXT
    )""")
    con.commit(); con.close()

def log_interaction(session_id: str, user: str, question: str, mode: str,
                    model: str, top_k: int, max_tokens: int, temperature: float,
                    answer: str, citations: list, error: str | None = None,
                    db_path: str = "rag_logs.db"):
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    cur.execute("""
        INSERT INTO interactions
        (ts, session_id, user, question, mode, model, top_k, max_tokens, temperature, answer, citations, error)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        int(time.time()), session_id, user, question, mode, model, top_k, max_tokens, float(temperature),
        answer, json.dumps(citations or []), error
    ))
    con.commit(); con.close()


Overwriting rag_core.py


# **App Skeleton (app.py) — Full UI with StreamLit including CSS**

Creates the initial Streamlit app wiring to import rag_core utilities.
Establishes the structure for data flow between upload → index → query → answer.

In [15]:
%%writefile app.py
import os, json, tempfile
import streamlit as st

from rag_core import (
    load_pdfs_as_docs, chunk_texts, build_or_update_chroma, load_chroma,
    rag_answer, rag_answer_exhaustive, summarize, quiz,
    init_logger, log_interaction
)

# ---------- Config ----------
BOT_NAME = "IndiGo RAG Bot"
HERO_IMG = "https://img.freepik.com/free-vector/chatbot-chat-message-vectorart_78370-4104.jpg?semt=ais_hybrid&w=740&q=80"

# ---------- Page setup ----------
st.set_page_config(page_title=f"{BOT_NAME} (Chroma + Groq)", page_icon="✈️", layout="wide")

# ---------- Light Blue / Deep Blue with Yellow Accent Theme ----------
PALETTE = {
    "bg1": "#e6f2ff", "bg2": "#b3d9ff", "card": "#ffffff", "text": "#0d1b2a",
    "muted": "#4f5d75", "brand": "#0077b6", "brand2": "#00b4d8", "accent": "#ffcc00",
    "success": "#10b981", "danger": "#ef4444",
}
THEME_CSS = f"""
<style>
html, body, [data-testid="stAppViewContainer"] {{
  background: linear-gradient(120deg, {PALETTE['bg1']}, {PALETTE['bg2']});
  color: {PALETTE['text']};
}}
[data-testid="stHeader"] {{ background: rgba(0,0,0,0); }}
.block-container {{
  padding-top: 1.2rem; padding-bottom: 2rem;
  background: {PALETTE['card']};
  border-radius: 16px; box-shadow: 0 10px 40px rgba(0,0,0,.15);
  border: 1px solid rgba(0,0,0,.08);
}}

[data-testid="stSidebar"] {{ background: {PALETTE['brand']}; }}
[data-testid="stSidebar"] .stMarkdown, [data-testid="stSidebar"] label,
[data-testid="stSidebar"] h1,[data-testid="stSidebar"] h2,[data-testid="stSidebar"] h3 {{ color: #fff !important; }}
[data-testid="stSidebar"] input, [data-testid="stSidebar"] textarea,
[data-testid="stSidebar"] .stTextInput > div > div > input,
[data-testid="stSidebar"] .stNumberInput input, [data-testid="stSidebar"] .stSelectbox > div,
[data-testid="stSidebar"] .stFileUploader, [data-testid="stSidebar"] .stSlider {{
  background: #ffffff !important; color: {PALETTE['text']} !important;
  border: 1px solid {PALETTE['brand2']} !important; border-radius: 10px !important;
}}
[data-testid="stSidebar"] [role="listbox"] *, [data-testid="stSidebar"] [data-baseweb="select"] * {{
  color: {PALETTE['text']} !important;
}}
[data-testid="stSidebar"] svg {{ color: {PALETTE['text']} !important; fill: {PALETTE['text']} !important; }}

div[data-testid="stSlider"] label, div[data-testid="stSlider"] span,
div[data-testid="stTickBarMin"], div[data-testid="stTickBarMax"] {{ color: {PALETTE['text']} !important; }}
div[data-baseweb="slider"] [role="slider"]{{ background:#f59e0b !important; border:2px solid {PALETTE['text']} !important; }}
div[data-baseweb="slider"] .rc-slider-track, div[data-baseweb="slider"] .rc-slider-rail {{ background:#0ea5e9 !important; }}

[data-testid="stSidebar"] .stButton > button {{
  background: linear-gradient(90deg, #ef4444, #f97316);
  color: white; border: 0; padding: .55rem 1rem; border-radius: 10px; font-weight: 600;
  box-shadow: 0 6px 18px rgba(0,0,0,.15);
}}
[data-testid="stSidebar"] .stButton > button:hover {{ filter: brightness(1.05); }}

h1, h2, h3, h4 {{ color: {PALETTE['text']}; }}
input, textarea, .stTextInput > div > div > input {{
  background: {PALETTE['bg1']} !important; color: {PALETTE['text']} !important;
  border: 1px solid {PALETTE['brand']} !important; border-radius: 10px !important;
}}
.stSelectbox, .stNumberInput, .stFileUploader, .stSlider, .stRadio, .stCheckbox label {{ color: {PALETTE['text']} !important; }}

.stButton > button {{
  background: linear-gradient(90deg, {PALETTE['brand']}, {PALETTE['brand2']});
  color: white; border: 0; padding: .55rem 1rem; border-radius: 10px; font-weight: 600;
  box-shadow: 0 6px 18px rgba(0,0,0,.15);
}}
.stButton > button:hover {{ filter: brightness(1.05); }}
[data-testid="baseButton-secondary"] {{ background: {PALETTE['accent']} !important; color: {PALETTE['text']} !important; border: none !important; }}

.streamlit-expanderHeader {{ color: {PALETTE['text']} !important; }}
code, pre {{ background: {PALETTE['bg1']} !important; color: {PALETTE['text']} !important; }}

.chat-bubble-user {{ background: rgba(0,183,255,.12); border: 1px solid {PALETTE['brand2']};
  padding: 12px 14px; border-radius: 12px; margin: 8px 0; color: {PALETTE['text']}; }}
.chat-bubble-assistant {{ background: rgba(255,204,0,.10); border: 1px solid {PALETTE['accent']};
  padding: 12px 14px; border-radius: 12px; margin: 8px 0; color: {PALETTE['text']}; }}

.hero-img {{
  width: 92px; height: 92px; border-radius: 18px; object-fit: cover;
  border: 2px solid {PALETTE['brand2']}; box-shadow: 0 10px 30px rgba(0,0,0,.15);
}}
.hero-title {{ font-size: 26px; font-weight: 800; letter-spacing:.2px; }}
.hero-sub {{ color: {PALETTE['muted']}; font-size: 13px; }}
.hr-soft {{ border-top: 1px solid rgba(0,0,0,.08); margin: .5rem 0 1rem; }}
</style>
"""
st.markdown(THEME_CSS, unsafe_allow_html=True)

# ---------- Hero ----------
st.markdown(
    f"""
    <div style="text-align: center; padding: 12px 6px 4px;">
        <img src="{HERO_IMG}" alt="Bot Image" style="width: 200px; border-radius: 16px; margin-bottom: 10px; box-shadow: 0 10px 30px rgba(0,0,0,.20);" />
        <h1 style="margin-bottom: 0;">✈️ {BOT_NAME}</h1>
        <p style="color: #274c77; font-size: 15px;">Multi-PDF → Chroma → Q&amp;A / Chat / Summary / Quiz (Groq)</p>
    </div>
    <hr class="hr-soft">
    """,
    unsafe_allow_html=True
)

# ---------- Session defaults ----------
if "vs_ready" not in st.session_state: st.session_state.vs_ready = False
if "messages" not in st.session_state: st.session_state.messages = []
if "nav" not in st.session_state: st.session_state.nav = "📥 Build Index"
if "persist_dir" not in st.session_state: st.session_state.persist_dir = "chroma_db"
if "collection" not in st.session_state: st.session_state.collection = "indigo_docs"
if "top_k" not in st.session_state: st.session_state.top_k = 12
if "temperature" not in st.session_state: st.session_state.temperature = 0.1
if "max_tokens" not in st.session_state: st.session_state.max_tokens = 2048
if "username" not in st.session_state: st.session_state.username = "user"

# ---------- Logger ----------
init_logger("rag_logs.db")

# ---------- Sidebar ----------
with st.sidebar:
    st.header("Settings")
    with st.form("settings_form"):
        groq_key_in = st.text_input("GROQ API Key (session only)", type="password", value=os.environ.get("GROQ_API_KEY",""))
        model_choice = st.selectbox("Groq model", ["llama-3.3-70b-versatile","deepseek-r1-distill-llama-70b","llama-3.1-70b-versatile","llama-3.1-8b-instant"], index=0)

        persist_dir = st.text_input("Chroma persist dir", value=st.session_state.persist_dir)
        collection  = st.text_input("Collection name", value=st.session_state.collection)
        top_k       = st.slider("Top-K retrieved chunks", 2, 60, st.session_state.top_k)
        temperature = st.slider("LLM Temperature", 0.0, 1.0, st.session_state.temperature)
        st.caption(f"Temp value: **{st.session_state.get('temperature', temperature):.2f}**")
        max_tokens  = st.slider("Max tokens", 256, 4096, st.session_state.max_tokens, step=64)
        st.caption(f"Max tokens value: **{st.session_state.get('max_tokens', max_tokens)}**")

        nav = st.radio(
            "Go to",
            ["📥 Build Index", "❓ Q&A", "💬 Document Chat", "📝 Summarize", "🧩 Quiz"],
            index=["📥 Build Index","❓ Q&A","💬 Document Chat","📝 Summarize","🧩 Quiz"].index(st.session_state.nav)
        )

        submitted = st.form_submit_button("Apply")
        if submitted:
            if groq_key_in:
                os.environ["GROQ_API_KEY"] = groq_key_in.strip()
            os.environ["GROQ_MODEL_OVERRIDE"] = model_choice

            st.session_state.persist_dir = persist_dir
            st.session_state.collection  = collection
            st.session_state.top_k       = top_k
            st.session_state.temperature = temperature
            st.session_state.max_tokens  = max_tokens
            st.session_state.nav         = nav

    st.caption("Key status: " + ("✅ set" if os.environ.get("GROQ_API_KEY") else "❌ missing"))
    st.caption("Embeddings: " + os.environ.get("EMBED_MODEL", "thenlper/gte-large") + " (GPU if available)")

# ---------- Active settings ----------
persist_dir = st.session_state.persist_dir
collection  = st.session_state.collection
top_k       = st.session_state.top_k
temperature = st.session_state.temperature
max_tokens  = st.session_state.max_tokens
active_page = st.session_state.nav

def need_vs():
    if not st.session_state.vs_ready:
        st.warning("No vector DB loaded yet. Build or Load an index first.")
        st.stop()

# ---------- Pages ----------
if active_page == "📥 Build Index":
    st.subheader("Upload PDFs → Build/Update Chroma Index")

    files = st.file_uploader("Upload one or more PDFs", type=["pdf"], accept_multiple_files=True)

    # NEW: reset toggle
    reset_index = st.checkbox("Reset index (rebuild from scratch)", value=False, help="Deletes existing Chroma data in the chosen persist dir before indexing.")

    colA, colB = st.columns(2)
    with colA:
        if st.button("Build / Update Index", use_container_width=True):
            if not files:
                st.error("Please upload at least one PDF")
            else:
                with st.spinner("Indexing..."):
                    paths=[]
                    for f in files:
                        p = os.path.join(tempfile.gettempdir(), f.name)
                        with open(p,"wb") as w: w.write(f.getbuffer())
                        paths.append(p)
                    docs = load_pdfs_as_docs(paths)
                    # Faster defaults come from rag_core; still good to match them here:
                    chunks = chunk_texts(docs, chunk_size=1800, chunk_overlap=150)
                    _ = build_or_update_chroma(chunks, persist_dir=persist_dir, collection_name=collection, reset=reset_index)
                    st.session_state.vs_ready = True
                st.success(f"Indexed {len(chunks)} chunks → {persist_dir}")

    with colB:
        if st.button("Load Existing Index", use_container_width=True):
            try:
                _ = load_chroma(persist_dir=persist_dir, collection_name=collection)
                st.session_state.vs_ready = True
                st.success("Loaded existing Chroma index.")
            except Exception as e:
                st.error(f"Failed to load: {e}")

elif active_page == "❓ Q&A":
    need_vs()
    st.subheader("Ask a question (grounded in your PDFs)")
    q = st.text_input("Your question", placeholder="e.g., Full procedure to publish a contract workspace")
    detailed = st.checkbox("Detailed (multi-pass map-reduce)", value=True)
    batch_size = st.slider("Batch size (Detailed mode)", 3, 12, 6)

    if st.button("Answer", use_container_width=True):
        with st.spinner("Thinking..."):
            vs = load_chroma(persist_dir=persist_dir, collection_name=collection)
            try:
                if detailed:
                    out = rag_answer_exhaustive(
                        vs, q, k=max(24, top_k*2), batch=batch_size,
                        temperature=0.0, max_tokens=min(1200, max_tokens)
                    )
                    mode = "qna_exhaustive"
                else:
                    out = rag_answer(vs, q, k=top_k, temperature=temperature, max_tokens=max_tokens)
                    mode = "qna"

                st.markdown("### Answer")
                st.write(out["answer"])
                with st.expander("Citations"):
                    for c in out["citations"]:
                        st.write(f"[{c['slot']}] {c['source']} (p.{c['page']})")

                log_interaction(
                    session_id="ui", user=st.session_state.username,
                    question=q, mode=mode,
                    model=os.environ.get("GROQ_MODEL_OVERRIDE","llama-3.3-70b-versatile"),
                    top_k=top_k, max_tokens=max_tokens,
                    temperature=(temperature if mode=="qna" else 0.0),
                    answer=out["answer"], citations=out["citations"], error=None
                )
            except Exception as e:
                st.error(str(e))
                log_interaction(
                    session_id="ui", user=st.session_state.username,
                    question=q, mode="qna" if not detailed else "qna_exhaustive",
                    model=os.environ.get("GROQ_MODEL_OVERRIDE","llama-3.3-70b-versatile"),
                    top_k=top_k, max_tokens=max_tokens,
                    temperature=(temperature if not detailed else 0.0),
                    answer="", citations=[], error=str(e)
                )

elif active_page == "💬 Document Chat":
    need_vs()
    st.subheader("Document Chat")

    user_msg = st.text_input("Message", placeholder="Ask a follow-up question…")
    col1, col2, col3 = st.columns([2,1,1])
    with col1:
        send = st.button("Send", use_container_width=True)
    with col2:
        if st.button("Clear Chat (session)"):
            st.session_state.messages = []
            st.success("Cleared in-memory chat history.")
    with col3:
        chat_history_txt = "\n".join([f"{m['role']}: {m['content']}" for m in st.session_state.messages])
        st.download_button("Download Chat (TXT)", chat_history_txt, file_name="chat_history.txt")

    if send and user_msg.strip():
        st.session_state.messages.append({"role":"user","content":user_msg})
        convo = "\n".join([f"{m['role']}: {m['content']}" for m in st.session_state.messages[-6:]])
        q2 = f"Conversation so far:\n{convo}\n\nUser's latest question: {user_msg}"
        with st.spinner("Thinking..."):
            vs = load_chroma(persist_dir=persist_dir, collection_name=collection)
            try:
                out = rag_answer(vs, q2, k=top_k, temperature=temperature, max_tokens=max_tokens)
                st.session_state.messages.append({"role":"assistant","content":out["answer"]})
                log_interaction(
                    session_id="ui", user=st.session_state.username,
                    question=user_msg, mode="chat",
                    model=os.environ.get("GROQ_MODEL_OVERRIDE","llama-3.3-70b-versatile"),
                    top_k=top_k, max_tokens=max_tokens, temperature=temperature,
                    answer=out["answer"], citations=out["citations"], error=None
                )
            except Exception as e:
                st.error(str(e))
                log_interaction(
                    session_id="ui", user=st.session_state.username,
                    question=user_msg, mode="chat",
                    model=os.environ.get("GROQ_MODEL_OVERRIDE","llama-3.3-70b-versatile"),
                    top_k=top_k, max_tokens=max_tokens, temperature=temperature,
                    answer="", citations=[], error=str(e)
                )

    st.markdown("#### Recent Chat")
    for m in st.session_state.messages[-12:]:
        if m["role"] == "user":
            st.markdown(f"<div class='chat-bubble-user'><b>You:</b> {m['content']}</div>", unsafe_allow_html=True)
        else:
            st.markdown(f"<div class='chat-bubble-assistant'><b>Assistant:</b> {m['content']}</div>", unsafe_allow_html=True)

elif active_page == "📝 Summarize":
    need_vs()
    st.subheader("Summarize (chapter/page-level brief)")
    topic = st.text_input("(Optional) Topic/file hint", value="overview")
    if st.button("Summarize", use_container_width=True):
        with st.spinner("Summarizing..."):
            vs = load_chroma(persist_dir=persist_dir, collection_name=collection)
            s = summarize(vs, topic_hint=topic, k=20, max_tokens=max_tokens)
            st.markdown("### Summary")
            st.write(s)
            log_interaction(
                session_id="ui", user=st.session_state.username,
                question=f"[SUMMARY] {topic}", mode="summary",
                model=os.environ.get("GROQ_MODEL_OVERRIDE","llama-3.3-70b-versatile"),
                top_k=20, max_tokens=max_tokens, temperature=0.2,
                answer=s, citations=[], error=None
            )

elif active_page == "🧩 Quiz":
    need_vs()
    st.subheader("Generate MCQ quiz")
    topic = st.text_input("(Optional) Topic hint", placeholder="e.g., PR→PO flow")
    num = st.slider("Number of questions", 3, 15, 5)
    if st.button("Create Quiz", use_container_width=True):
        with st.spinner("Generating..."):
            vs = load_chroma(persist_dir=persist_dir, collection_name=collection)
            items = quiz(vs, topic_hint=topic, num=num)
            if isinstance(items, list) and items and isinstance(items[0], dict) and "question" in items[0]:
                for i, qx in enumerate(items, start=1):
                    st.markdown(f"**Q{i}. {qx['question']}**")
                    for opt in qx["options"]:
                        st.write(f"- {opt}")
                    st.caption(f"**Answer:** {qx['answer']} — {qx.get('why','')}")
            else:
                st.write("Model returned non-JSON output:")
                st.code(items, language="json")
            log_interaction(
                session_id="ui", user=st.session_state.username,
                question=f"[QUIZ] {topic} (n={num})", mode="quiz",
                model=os.environ.get("GROQ_MODEL_OVERRIDE","llama-3.3-70b-versatile"),
                top_k=top_k, max_tokens=max_tokens, temperature=0.2,
                answer=json.dumps(items, ensure_ascii=False), citations=[], error=None
            )


Overwriting app.py


# **Install Streamlit**

Ensures the UI runs on a known version for consistent behavior.
Avoids surprise breakages from upstream updates.

In [16]:
!pip install -q streamlit==1.35.0


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opentelemetry-proto 1.36.0 requires protobuf<7.0,>=5.0, but you have protobuf 4.25.8 which is incompatible.
ydf 0.13.0 requires protobuf<7.0.0,>=5.29.1, but you have protobuf 4.25.8 which is incompatible.
grpcio-status 1.71.2 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 4.25.8 which is incompatible.[0m[31m
[0m

 (Environment/Service Prep)

In [17]:
import os
os.environ["GROQ_API_KEY"] = "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"
print("GROQ key set:", bool(os.environ.get("GROQ_API_KEY")))


GROQ key set: True


# **Run App + Public Tunnel (Cloudflared)**

Launches Streamlit headless and opens a public URL via Cloudflared.
Perfect for quick demos, remote reviews, and mobile testing.

In [18]:
import subprocess, time, re, os
from IPython.display import display, HTML  # NEW: pretty banner

workdir = "/content"  # change if app.py is elsewhere

# 1) Start Streamlit
streamlit_proc = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501", "--server.headless", "true"],
    cwd=workdir,
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)

# 2) Download cloudflared binary
!wget -q -O /content/cloudflared https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!chmod +x /content/cloudflared

# 3) Start the tunnel and print the URL
tunnel_proc = subprocess.Popen(
    ["/content/cloudflared", "tunnel", "--url", "http://localhost:8501", "--no-autoupdate"],
    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
)

public_url = None
start_time = time.time()
print("Starting Streamlit + Cloudflare tunnel...")

while time.time() - start_time < 120:
    line = tunnel_proc.stdout.readline()
    if not line:
        time.sleep(1); continue
    line = line.strip()
    print(line)
    if "trycloudflare.com" in line:
        m = re.search(r"(https://[a-z0-9-]+\.trycloudflare\.com)", line)
        if m:
            public_url = m.group(1)
            print("\nPublic URL:", public_url)

            # NEW: read a few more lines so your banner lands AFTER the
            # "Registered tunnel connection ..." message (if/when it appears)
            tail_deadline = time.time() + 8  # wait up to 8s for that line
            while time.time() < tail_deadline:
                tail = tunnel_proc.stdout.readline()
                if not tail:
                    time.sleep(0.2); continue
                tail = tail.strip()
                print(tail)
                if "Registered tunnel connection" in tail:
                    break

            # NEW: big, bold, colored, clickable banner
            display(HTML(f"""
            <div style="margin-top:12px; text-align:center;">
              <div style="
                display:inline-block;
                font-size:26px; font-weight:800;
                color:#0B57D0; background:#FFF3CD;
                border:2px solid #FCD34D; border-radius:12px;
                padding:14px 18px; box-shadow:0 2px 10px rgba(0,0,0,0.08);
              ">
                🚀 <span style="color:#111827;">Click to open Streamlit:</span>
                <a href="{public_url}" target="_blank" style="color:#DC2626; text-decoration:none; font-weight:900;">
                  {public_url}
                </a>
              </div>
            </div>
            """))
            break

if not public_url:
    print("\nIf you don't see the URL above, re-run this cell and keep it running.")


Starting Streamlit + Cloudflare tunnel...
2025-08-12T03:32:34Z INF Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
2025-08-12T03:32:34Z INF Requesting new quick Tunnel on trycloudflare.com...
2025-08-12T03:32:37Z INF +--------------------------------------------------------------------------------------------+
2025-08-12T03:32:37Z INF |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
2025-08-12T03:32:37Z INF |  https://eng