Chain / Retriever / RCEL with Langchain [Advance RAG Pipeline]

In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('attention.pdf')
docs = loader.load()
docs

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
text_splitter.split_documents(docs)[:5]

In [None]:
documents = text_splitter.split_documents(docs)
docs

In [None]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embedding = OpenAIEmbeddings()
db = FAISS.from_documents(documents[:30], embedding)

In [None]:
query = "What is Attenion function"
result = db.similarity_search(query)
result[0].page_content

In [None]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os

load_dotenv()
# OpenAI API Key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model="gpt-5-nano")
llm

In [15]:
# LCEL-style chain equivalent
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document

# Prompt  Design
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following questions based only on the provided context.
    Think step by step before providing a detailed answer.
    I will tip you 1000$ if the user finds the response helpful.

    <context>
    {context}
    </context>

    Question: {input}
    """
)

# --- Build the LCEL pipeline:
document_chain = prompt | llm | StrOutputParser()

In [None]:

"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
"""

retriever=db.as_retriever()
retriever

In [17]:
# Build retrieval chain using LCEL (modern approach)
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retrieval_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
response = retrieval_chain.invoke("An attention function can be described as mapping a query")
response