In [None]:
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader, DirectoryLoader

dir_loader = DirectoryLoader(
    "../data/pdf",
    glob="**/*.pdf",  
    loader_cls=PyMuPDFLoader,
    show_progress=False
)

pdf_documents = dir_loader.load()

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_documents(documents,chunk_size=1000,chunk_overlap=200):
    """Split documents into smaller chunks for better RAG performance"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    return split_docs

all_splits=split_documents(pdf_documents)

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vector_store = Chroma.from_documents(
    documents=all_splits,
    embedding=embeddings,
    persist_directory="../chroma_db"  # Saves to project root
)

## RAG Retriever & LLM Integration


In [None]:
## Create a retriever from the vector store
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}  # Return top 3 most similar documents
)# List[Document]

# Test the retriever
query = "What is deep learning?"
retrieved_docs = retriever.invoke(query)
print(f"Retrieved {len(retrieved_docs)} documents for: '{query}'")

Retrieved 3 documents for: 'What is deep learning?'


In [None]:
## Set up LLM
# Using Ollama (local LLM)
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

try:
    llm = ChatOllama(
        model="gpt-oss:20b",  # Change to your installed model (llama3, mistral, etc.)
        temperature=0.7
    )
    print("Ollama LLM initialized successfully")
except Exception as e:
    print(f"Ollama initialization failed: {e}")
    print("Make sure Ollama is running: ollama serve")
    llm = None


Ollama LLM initialized successfully


In [None]:
## Create RAG Chain
# Define the system prompt
system_prompt = """You are an AI assistant that answers questions based on the provided context. 
If the context doesn't contain the answer, say "I don't have enough information to answer this question."
Keep your answers concise and relevant."""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "Context:\n{context}\n\nQuestion: {question}")
])

# Create RAG chain manually
if llm:
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    print("RAG chain created successfully")
else:
    rag_chain = None
    print("RAG chain not created (LLM not available)")


RAG chain created successfully


In [None]:
if rag_chain:
    # Example queries
    test_queries = [
        "What is deep learning?",
        "Explain attention mechanisms",
        "What are transformers?"
    ]
    
    for query in test_queries:
        print(f"Query: {query}")
        
        answer = rag_chain.invoke(query)
        print(f"\nAnswer:\n{answer}")
        
        # Show retrieved sources
        retrieved = retriever.invoke(query)
        print(f"\nRetrieved Sources ({len(retrieved)} docs):")
        for i, doc in enumerate(retrieved, 1):
            print(f"  {i}. {doc.metadata.get('source', 'Unknown')}")

        print(f"{'='*60}")
else:
    print("Cannot test RAG chain (LLM not available)")


Query: What is deep learning?

Answer:
I don't have enough information to answer this question.

Retrieved Sources (3 docs):
  1. ../data/pdf/nlp_llm/Language-Models-are-Few-Shot-Learners.pdf
  2. ../data/pdf/nlp_llm/Language-Models-are-Few-Shot-Learners.pdf
  3. ../data/pdf/nlp_llm/Language-Models-are-Few-Shot-Learners.pdf
Query: Explain attention mechanisms

Answer:
Attention mechanisms let a model “look at” different parts of an input sequence when producing each output token.  
The core idea is to compute, for each query vector **q**, a weighted sum of value vectors **v** where the weights come from a similarity score between **q** and each key vector **k**.

**Key steps**

1. **Linear projections**  
   The input embeddings are projected into three spaces (queries, keys, values) using learned weight matrices. In multi‑head attention this is done *h* times with different projections, producing \(h\) sets of \((q, k, v)\) each of size \(d_k, d_k, d_v\).

2. **Similarity scores**  
 