In [None]:
# üìù Notes on this RAG Notebook

- **Purpose:** This notebook demonstrates a basic **RAG (Retrieval-Augmented Generation)** workflow using a PDF document as the knowledge source.
- **How it works:**
  1. The PDF is loaded and split into **text chunks**.
  2. Each chunk is embedded into a **vector database** using `HuggingFaceEmbeddings`.
  3. A **retriever** fetches the most relevant chunks when a question is asked.
  4. A **language model** (Flan-T5) generates an answer based on the retrieved context.

- **What this shows:**
  - I understand the concepts of **document retrieval, embedding, and generation**.
  - The answer is **grounded in the document**, reducing hallucinations.
  - The setup is simple, **functional, and clear**, making it reproducible.

- **Notes on output:**
  - Answers are concise and reflect **information directly from the context**.
  - This notebook can be extended to multiple documents or larger models.

- **Tools used:**
  - `langchain`, `sentence-transformers`, `faiss`, `transformers`
  - CPU device (for demonstration; GPU recommended for larger models)


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

model_name = "google/flan-t5-small"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create pipeline
llm = pipeline(
    task="text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device=-1,  # CPU
    max_new_tokens=150
)


# -----------------------------
# 2. Retrieve relevant context
# -----------------------------
question = "What is LoRA and why is it efficient?"

docs = retriever.invoke(question)
context = "\n".join(doc.page_content for doc in docs)

# -----------------------------
# 3. RAG prompt
# -----------------------------
prompt = f"""
Answer the question ONLY using the context below.
Do NOT repeat phrases.
If the answer is not in the context, respond with "I don't know."

Context:
{context}

Question:
{question}

Answer:
"""

# -----------------------------
# 4. Generate answer
# -----------------------------
response = llm(prompt)
answer = response[0]["generated_text"].split("Answer:")[-1].strip()

print("ANSWER:\n")
print(answer)
