In [None]:
import os
from generation.generate_answer import RAGGenerator
from indexing.retrieve_chunks import retrieve

In [None]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

HF_TOKEN = os.environ.get('HF_HUB_TOKEN')
assert HF_TOKEN is not None, "HF_HUB_TOKEN not found in environment or .env file"

# Use remote embedding to avoid local model downloads
os.environ["USE_REMOTE_EMBED"] = "1"

In [None]:
MODELS = [
    "tiiuae/falcon-7b-instruct",
    "HuggingFaceH4/zephyr-7b-beta",
    "mistralai/Mistral-7B-Instruct-v0.2",
]

In [None]:
queries = [
    "How do I enable mixed precision training in PyTorch?",
]


In [None]:
TOP_K = 10
RERANK_K = 3

query = queries[0]

# Dense retrieval (remote embedding)
retrieved = retrieve(query, top_k=TOP_K, rerank=False)

# Skip cross-encoder reranking to avoid local downloads
# Use top-K from dense retrieval
top_chunks = retrieved[:RERANK_K]

len(top_chunks), [c["title"] for c in top_chunks]

In [None]:
model_name = MODELS[0]

rag = RAGGenerator(model_name, HF_TOKEN)

result = rag.generate(
    query=query,
    chunks=top_chunks,
    max_new_tokens=200,
    temperature=0.0,
)

In [None]:
print("Model used:", result["model"])
print("Endpoint used:", result.get("endpoint", "text_generation"))
print("==== PROMPT SENT TO MODEL ====\n")
print(result["prompt"])

print("\n==== GENERATED ANSWER ====\n")
print(result["answer"])

print("\n==== CHUNKS USED ====\n")
for i, ch in enumerate(result["chunks"], 1):
    print(f"[Chunk {i}]")
    print("Title:", ch.get("title"))
    print("Source:", ch.get("source"))
    print("Strategy:", ch.get("chunk_strategy"))
    print("Text:", ch["text"])
    print("-" * 60)