In [None]:
# =============================================================================
# RAG - Retrieval Augmented Generation (LangChain 1.0+)
# =============================================================================
"""
=====================================================================
Retrieval Augmented Generation (RAG)
=====================================================================

What is RAG?
------------
RAG enhances LLM responses by retrieving relevant documents
and including them as context. This allows LLMs to:
- Answer questions about YOUR data
- Stay up-to-date (no training cutoff limitation)
- Reduce hallucinations with grounded facts

RAG Pipeline:
-------------
    User Query
         ‚îÇ
         ‚ñº
    ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ   1. EMBED      ‚îÇ  Convert query to vector
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
         ‚îÇ
         ‚ñº
    ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ  2. RETRIEVE    ‚îÇ  Find similar documents
    ‚îÇ  (Vector Store) ‚îÇ
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
         ‚îÇ
         ‚ñº
    ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ  3. AUGMENT     ‚îÇ  Add docs to prompt
    ‚îÇ  (Prompt)       ‚îÇ
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
         ‚îÇ
         ‚ñº
    ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ  4. GENERATE    ‚îÇ  LLM creates response
    ‚îÇ  (Model)        ‚îÇ
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
         ‚îÇ
         ‚ñº
    Final Answer

Updated for LangChain 1.0+ (2025-2026)
"""

import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

print("‚úÖ Environment configured for RAG examples")

In [None]:
# =============================================================================
# Step 1: Document Loading
# =============================================================================
"""
Document Loaders
----------------
Load data from various sources:
- TextLoader - Plain text files
- PDFLoader - PDF documents
- WebBaseLoader - Web pages
- CSVLoader - CSV files
- DirectoryLoader - Multiple files from a folder

Each loader returns Document objects with:
- page_content: The text content
- metadata: Source info, page numbers, etc.
"""

from langchain_core.documents import Document

print("=" * 60)
print("Step 1: Document Loading")
print("=" * 60)

# For this tutorial, we'll create sample documents
# In production, you'd use loaders like TextLoader, PDFLoader, etc.

sample_documents = [
    Document(
        page_content="""LangChain is a framework for developing applications powered by 
        large language models (LLMs). It provides tools for prompt management, chains, 
        agents, and memory management. LangChain was created by Harrison Chase in 2022.""",
        metadata={"source": "langchain_intro.txt", "topic": "overview"}
    ),
    Document(
        page_content="""LCEL (LangChain Expression Language) is a declarative way to compose 
        chains using the pipe operator. It supports streaming, async, batch processing, 
        and parallel execution out of the box. LCEL is the recommended way to build chains.""",
        metadata={"source": "lcel_guide.txt", "topic": "lcel"}
    ),
    Document(
        page_content="""RAG (Retrieval Augmented Generation) combines LLMs with external 
        knowledge retrieval. It helps reduce hallucinations and allows LLMs to answer 
        questions about specific documents or data not in their training set.""",
        metadata={"source": "rag_overview.txt", "topic": "rag"}
    ),
    Document(
        page_content="""Vector stores like FAISS, Chroma, and Pinecone store document 
        embeddings for fast similarity search. They are essential components of RAG 
        systems, enabling semantic search over large document collections.""",
        metadata={"source": "vector_stores.txt", "topic": "storage"}
    ),
    Document(
        page_content="""LangChain agents can use tools to interact with external systems. 
        The create_agent() function in LangChain 1.0 is the standard way to create agents. 
        Agents can search the web, query databases, or call APIs.""",
        metadata={"source": "agents_guide.txt", "topic": "agents"}
    ),
]

print(f"\nüìÑ Loaded {len(sample_documents)} documents")
for i, doc in enumerate(sample_documents):
    print(f"   {i+1}. {doc.metadata['source']} ({doc.metadata['topic']})")

In [None]:
# =============================================================================
# Step 2: Text Splitting
# =============================================================================
"""
Text Splitters
--------------
Documents often need to be split into smaller chunks:
- Fit within context window limits
- More precise retrieval
- Better embedding quality

Common splitters:
- RecursiveCharacterTextSplitter - Best general-purpose
- CharacterTextSplitter - Simple character-based
- TokenTextSplitter - Based on token count

Key parameters:
- chunk_size: Maximum characters per chunk
- chunk_overlap: Characters shared between chunks
"""

from langchain_text_splitters import RecursiveCharacterTextSplitter

print("=" * 60)
print("Step 2: Text Splitting")
print("=" * 60)

# Create text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,       # Max characters per chunk
    chunk_overlap=50,     # Overlap between chunks
    length_function=len,
    separators=["\n\n", "\n", ". ", " ", ""]  # Split priorities
)

# Split documents
splits = text_splitter.split_documents(sample_documents)

print(f"\nüìÑ Original: {len(sample_documents)} documents")
print(f"üìã After splitting: {len(splits)} chunks")
print(f"\nüìù Sample chunks:")
for i, chunk in enumerate(splits[:3]):
    print(f"\n   Chunk {i+1} ({len(chunk.page_content)} chars):")
    print(f"   '{chunk.page_content[:80]}...'")

In [None]:
# =============================================================================
# Step 3: Embeddings
# =============================================================================
"""
Embeddings
----------
Convert text into numerical vectors that capture semantic meaning.
Similar texts have similar vectors (close in vector space).

Popular embedding models:
- OpenAI: text-embedding-3-small, text-embedding-3-large
- Hugging Face: all-MiniLM-L6-v2, BGE models
- Cohere: embed-english-v3.0
- Google: textembedding-gecko
"""

from langchain_openai import OpenAIEmbeddings

print("=" * 60)
print("Step 3: Creating Embeddings")
print("=" * 60)

# Initialize embeddings model
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"  # Fast and cost-effective
)

# Test embedding a single text
sample_text = "What is LangChain?"
sample_embedding = embeddings.embed_query(sample_text)

print(f"\nüìä Embedding model: text-embedding-3-small")
print(f"üìè Vector dimension: {len(sample_embedding)}")
print(f"üìù Sample text: '{sample_text}'")
print(f"üî¢ First 5 values: {sample_embedding[:5]}")

In [None]:
# =============================================================================
# Step 4: Vector Store
# =============================================================================
"""
Vector Stores
-------------
Store embeddings for fast similarity search.

Options:
- FAISS - Fast, in-memory, free (great for development)
- Chroma - Persistent, easy to use
- Pinecone - Managed cloud service
- Weaviate - Full-featured, open source
- Qdrant - High performance, filtering support
"""

from langchain_community.vectorstores import FAISS

print("=" * 60)
print("Step 4: Creating Vector Store")
print("=" * 60)

# Create vector store from documents
vectorstore = FAISS.from_documents(
    documents=splits,
    embedding=embeddings
)

print(f"\n‚úÖ Vector store created with {len(splits)} chunks")

# Test similarity search
query = "What is LCEL?"
similar_docs = vectorstore.similarity_search(query, k=2)

print(f"\nüîç Query: '{query}'")
print(f"üìÑ Top 2 similar documents:")
for i, doc in enumerate(similar_docs):
    print(f"\n   {i+1}. {doc.page_content[:100]}...")
    print(f"      Source: {doc.metadata.get('source', 'unknown')}")

In [None]:
# =============================================================================
# Step 5: Retriever
# =============================================================================
"""
Retrievers
----------
Wrap vector stores with a consistent interface.
Retrievers are Runnables - they work with LCEL!

Retriever Types:
- VectorStoreRetriever - Basic similarity search
- MultiQueryRetriever - Generates multiple query variants
- ContextualCompressionRetriever - Compresses retrieved docs
- EnsembleRetriever - Combines multiple retrievers
"""

print("=" * 60)
print("Step 5: Creating Retriever")
print("=" * 60)

# Create retriever from vector store
retriever = vectorstore.as_retriever(
    search_type="similarity",  # or "mmr" for diversity
    search_kwargs={"k": 3}     # Return top 3 documents
)

# Test retriever (it's a Runnable!)
query = "How do agents work in LangChain?"
retrieved_docs = retriever.invoke(query)

print(f"\nüîç Query: '{query}'")
print(f"üìÑ Retrieved {len(retrieved_docs)} documents:")
for i, doc in enumerate(retrieved_docs):
    print(f"\n   {i+1}. {doc.page_content[:80]}...")

In [None]:
# =============================================================================
# Step 6: Complete RAG Chain
# =============================================================================
"""
Building the RAG Chain
----------------------
Combine all components using LCEL:

1. Retriever gets relevant documents
2. Prompt formats docs + question
3. Model generates answer
4. Parser extracts text
"""

from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

print("=" * 60)
print("Step 6: Complete RAG Chain")
print("=" * 60)

# Initialize model
model = init_chat_model("gpt-4o-mini", model_provider="openai", temperature=0)

# RAG prompt template
rag_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful assistant that answers questions based on the 
provided context. If the answer is not in the context, say so.

Context:
{context}"""),
    ("human", "{question}")
])

# Helper function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Build RAG chain
rag_chain = (
    {
        "context": retriever | format_docs,  # Retrieve and format
        "question": RunnablePassthrough()     # Pass question through
    }
    | rag_prompt
    | model
    | StrOutputParser()
)

print("\n‚úÖ RAG chain built!")
print("\nüìä Chain structure:")
print("   retriever ‚Üí format_docs ‚Üí prompt ‚Üí model ‚Üí parser")

In [None]:
# =============================================================================
# Step 7: Using the RAG Chain
# =============================================================================
"""
Testing Our RAG System
----------------------
Ask questions that require knowledge from our documents.
"""

print("=" * 60)
print("Step 7: Testing RAG Chain")
print("=" * 60)

# Test questions
questions = [
    "What is LCEL and what are its benefits?",
    "Who created LangChain and when?",
    "What are vector stores used for in RAG?",
    "What is quantum computing?"  # Not in our docs!
]

for question in questions:
    print(f"\n‚ùì Question: {question}")
    answer = rag_chain.invoke(question)
    print(f"üí¨ Answer: {answer}")
    print("-" * 40)

In [None]:
# =============================================================================
# Advanced: RAG with Sources
# =============================================================================
"""
RAG with Source Attribution
---------------------------
Return both the answer AND the source documents.
This helps users verify the information.
"""

from langchain_core.runnables import RunnableParallel

print("=" * 60)
print("Advanced: RAG with Sources")
print("=" * 60)

# RAG chain that returns sources too
rag_chain_with_sources = RunnableParallel({
    "answer": rag_chain,
    "sources": retriever  # Also return the source documents
})

# Test with sources
question = "What is RAG and why is it useful?"
result = rag_chain_with_sources.invoke(question)

print(f"\n‚ùì Question: {question}")
print(f"\nüí¨ Answer: {result['answer']}")
print(f"\nüìö Sources:")
for i, doc in enumerate(result['sources']):
    source = doc.metadata.get('source', 'unknown')
    print(f"   {i+1}. {source}")

In [None]:
# =============================================================================
# Advanced: Conversational RAG
# =============================================================================
"""
Conversational RAG
------------------
Allow follow-up questions that reference previous context.
Uses chat history to reformulate queries.
"""

from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage

print("=" * 60)
print("Advanced: Conversational RAG")
print("=" * 60)

# Prompt to reformulate questions with context
contextualize_prompt = ChatPromptTemplate.from_messages([
    ("system", """Given the chat history and latest question, 
reformulate the question to be standalone (understandable without history).
If the question is already standalone, return it as-is."""),
    MessagesPlaceholder("chat_history"),
    ("human", "{question}")
])

# Chain to reformulate question
contextualize_chain = contextualize_prompt | model | StrOutputParser()

# Full conversational RAG
def conversational_rag(question: str, chat_history: list):
    # If there's history, reformulate the question
    if chat_history:
        standalone_question = contextualize_chain.invoke({
            "chat_history": chat_history,
            "question": question
        })
    else:
        standalone_question = question
    
    # Get answer using RAG
    answer = rag_chain.invoke(standalone_question)
    return answer, standalone_question

# Simulate a conversation
chat_history = []
conversation = [
    "What is LangChain?",
    "Who created it?",  # References LangChain from previous turn
    "What year?"        # References creator from previous turn
]

for question in conversation:
    answer, reformulated = conversational_rag(question, chat_history)
    
    print(f"\nüë§ User: {question}")
    if reformulated != question:
        print(f"   (Reformulated: {reformulated})")
    print(f"ü§ñ Assistant: {answer}")
    
    # Update history
    chat_history.extend([
        HumanMessage(content=question),
        AIMessage(content=answer)
    ])

In [None]:
# =============================================================================
# Summary: RAG in LangChain 1.0+
# =============================================================================
"""
=====================================================================
KEY TAKEAWAYS - RAG Pipeline
=====================================================================

1. LOAD DOCUMENTS:
   -----------------
   from langchain_community.document_loaders import TextLoader
   docs = TextLoader("file.txt").load()

2. SPLIT TEXT:
   ------------
   from langchain_text_splitters import RecursiveCharacterTextSplitter
   splitter = RecursiveCharacterTextSplitter(chunk_size=500)
   chunks = splitter.split_documents(docs)

3. CREATE EMBEDDINGS:
   -------------------
   from langchain_openai import OpenAIEmbeddings
   embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

4. CREATE VECTOR STORE:
   ---------------------
   from langchain_community.vectorstores import FAISS
   vectorstore = FAISS.from_documents(chunks, embeddings)

5. CREATE RETRIEVER:
   ------------------
   retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

6. BUILD RAG CHAIN:
   -----------------
   rag_chain = (
       {"context": retriever | format_docs, "question": RunnablePassthrough()}
       | rag_prompt
       | model
       | StrOutputParser()
   )

7. USE THE CHAIN:
   ---------------
   answer = rag_chain.invoke("Your question here")

Common Imports:
---------------
from langchain_community.document_loaders import TextLoader, PDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS, Chroma
from langchain_core.runnables import RunnablePassthrough

=====================================================================
"""

print("=" * 60)
print("RAG Module Complete!")
print("=" * 60)
print("""
RAG Pipeline Summary:
---------------------
1. Load ‚Üí 2. Split ‚Üí 3. Embed ‚Üí 4. Store ‚Üí 5. Retrieve ‚Üí 6. Generate

Key Components:
---------------
- Document Loaders (TextLoader, PDFLoader, WebBaseLoader)
- Text Splitters (RecursiveCharacterTextSplitter)
- Embeddings (OpenAIEmbeddings, HuggingFaceEmbeddings)
- Vector Stores (FAISS, Chroma, Pinecone)
- Retrievers (as_retriever(), MultiQueryRetriever)

Next: 10-agents.ipynb - Agent patterns with create_agent()
""")