# ColBERT Chatbot Demo

This notebook builds a local ColBERT index from the files in `docs/` (300-word chunks, 50-word overlap) and chats with it directly from Jupyterâ€”no extra frameworks or environments required.

## Prerequisites
- Install project requirements once (`pip install -r requirements.txt`).
- Place your TXT/PDF/DOCX sources under `docs/`.
- Make sure `OPENAI_API_KEY` is available in your environment (or a `.env` file).

In [None]:
from pathlib import Path

from rag import build_colbert_index

INDEX_NAME = "ats_rag_index"
METADATA_PATH = Path("data/chunk_metadata.json")

if not METADATA_PATH.exists():
    stats = build_colbert_index(
        docs_dir="docs",
        index_name=INDEX_NAME,
        chunk_size=300,
        overlap=50,
    )
    print(
        f"Built index '{stats['index_name']}' with "
        f"{stats['num_documents']} documents / {stats['num_chunks']} chunks."
    )
else:
    print(f"Re-using existing index '{INDEX_NAME}' (metadata at {METADATA_PATH}).")

In [None]:
from rag import ColbertChatbot

chatbot = ColbertChatbot(
    index_name=INDEX_NAME,
    metadata_path=str(METADATA_PATH),
    k=3,
    context_window=6,
)

chatbot.greet()

In [None]:
def ask(question: str):
    response = chatbot.ask(question)
    print(response.answer)
    if response.used_context:
        refs = {ctx.source for ctx in response.contexts}
        print("\nReferences:")
        for ref in refs:
            print(f"- {ref}")
    return response

# Example question
ask("Summarize the key points from the sample document.")