In [6]:
# rag_faiss_final.ipynb
import os
import os

# Load Groq API key from environment variable
api_key = os.environ.get("GROQ_API_KEY")

if not api_key:
    raise ValueError("Please set the GROQ_API_KEY environment variable before running the notebook")


In [7]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq

In [11]:
def build_rag_pipeline(pdf_path="Concept Note.pdf"):
    # 1. load pdf
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    # 2. split into chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    chunks = splitter.split_documents(docs)

    # 3. embeddings (local, free)
    emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    # 4. FAISS vector DB
    db = FAISS.from_documents(chunks, emb)

    # 5. retriever
    retriever = db.as_retriever(search_kwargs={"k": 3})

    # 6. prompt template
    prompt = PromptTemplate(
        input_variables=["context", "question"],
        template="""
You are an expert assistant. Use ONLY the context below to answer the question.

Context:
{context}

Question:
{question}

Answer:
"""
    )
    # 7. LLM (choose currently supported Groq model)
    llm = ChatGroq(model="llama-3.1-8b-instant")  # use the model that works for your key

    # 8. build runnable RAG pipeline
    def format_docs(docs):
        return "\n\n".join([d.page_content for d in docs])

    rag_chain = (
        {
            "context": retriever | RunnableLambda(format_docs),
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
    )

    return rag_chain, retriever, chunks

def ask_question(rag_chain, query):
    res = rag_chain.invoke(query)
    return res.content

def main():
    rag_chain, retriever, chunks = build_rag_pipeline("Concept Note.pdf")
    print(f"Loaded {len(chunks)} chunks. Retriever k={retriever.search_kwargs.get('k',3)}")

    # Demo queries
    examples = [
        "what is the recipe for pizza "
        
    ]

    for q in examples:
        print("\n>>> QUERY:", q)
        answer = ask_question(rag_chain, q)
        print("ANSWER:\n", answer)

    # Show what happens without retrieval: (LLM-only baseline)
    print("\n--- LLM-only baseline (no retrieval) ---")
    llm = ChatGroq(model="llama-3.1-8b-instant")
    baseline = llm.invoke("What is the main purpose of the Concept Note?").content
    print("LLM-only answer:\n", baseline)

    print("\nDone.")

if __name__ == "__main__":
    main()


Loaded 10 chunks. Retriever k=3

>>> QUERY: what is the recipe for pizza 
ANSWER:
 I don't have information about the recipe for pizza in the provided context. The context only discusses Physics-Informed Neural Networks (PINNs) for climate modeling, specifically a case study on rainfall prediction in the Blue Nile Basin.

--- LLM-only baseline (no retrieval) ---
LLM-only answer:
 A Concept Note is a document that provides an overview of a project, proposal, or idea. Its main purpose is to introduce, explain, and justify a project or idea, highlighting its key components, objectives, and expected outcomes.

Typically, a Concept Note is used in the following contexts:

1. **Grant proposals**: To submit a project idea to a funding organization, highlighting its relevance, feasibility, and potential impact.
2. **Project planning**: To outline the scope, goals, and objectives of a project, ensuring that all stakeholders are aligned and aware of the project's direction.
3. **Research proposa