# Hybrid Retriever - Combing Dense and Sparse Retriever

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain_classic.retrievers import EnsembleRetriever
from langchain_core.documents import Document

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Step 1: Sample documents
docs = [
    Document(page_content="Langchain helps build LLM applications"),
    Document(page_content="Pinecone is a vector database for Semantic Search"),
    Document(page_content="The Eiffel Tower is located in Paris"),
    Document(page_content="Langchain can be used to develop Agentic AI application"),
    Document(page_content="Langchain has many tyoe of retrievers")
]

# Step 2: Dense retriever (FAISS + HuggingFace)
embedding_model = HuggingFaceEmbeddings(model_name="all-miniLM-L6-v2")
dense_vectorstore = FAISS.from_documents(docs, embedding_model)
dense_retriever = dense_vectorstore.as_retriever()

In [3]:
### Step 2: Spare retriever (BM25)
sparse_retriever = BM25Retriever.from_documents(docs)
sparse_retriever.k=3  ## top k docs to retriever

## Step 4 : Combine with Ensemble Retriever
hybrid_retriever = EnsembleRetriever(
    retrievers=[dense_retriever, sparse_retriever],
    weights=[0.7,0.3]
)

In [4]:
hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000243DA10F7D0>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x00000243DEDED8D0>, k=3)], weights=[0.7, 0.3])

In [5]:
## Step 5 : Query and get results
query =  "How can I build applications using LLMs?"
results = hybrid_retriever.invoke(query)

# Step 6 -  Print
for i, doc in enumerate(results):
    print(f"Doc {i+1}: \n {doc.page_content}")

Doc 1: 
 Langchain helps build LLM applications
Doc 2: 
 Langchain can be used to develop Agentic AI application
Doc 3: 
 Langchain has many tyoe of retrievers
Doc 4: 
 Pinecone is a vector database for Semantic Search


# RAG pipeline with Hybrid Retriever

In [6]:
from langchain_classic.chat_models import init_chat_model
from langchain_classic.prompts import PromptTemplate
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains.retrieval import create_retrieval_chain


In [7]:
prompt = PromptTemplate.from_template("""
                                      Answer the question strictly based on the context below. Do not add any information pther than what is present in the context
                                      
                                      Context:
                                      {context}

                                      Question:
                                      {input}
                                      """)

llm= init_chat_model("groq:openai/gpt-oss-20b", temperature = 1)


In [8]:
# Create Stuff Document Chain
document_chain= create_stuff_documents_chain(llm=llm, prompt =prompt)


## Create Full Rag Chain
rag_chain = create_retrieval_chain(retriever=hybrid_retriever, combine_docs_chain= document_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000243DA10F7D0>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x00000243DEDED8D0>, k=3)], weights=[0.7, 0.3]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n                                      Answer the question strictly based on the context below. Do not add any information pther 

In [9]:
query = {"input": "How can I build an app using LLMs?"}
response =  rag_chain.invoke(query)

print("Answer:\n", response["answer"])

print ("\n Source documents:")
for i, doc in enumerate(response["context"]):
    print(f"Doc {i+1}: \n {doc.page_content}")

Answer:
 You can build an app that uses LLMs by following these steps:

1. **Use Langchain** – It is a framework that helps you create LLM‑based applications.
2. **Add agentic capability** – Langchain can be used to develop agentic AI applications, letting the LLM act autonomously or in a workflow.
3. **Incorporate retrievers** – Langchain provides various types of retrievers to fetch relevant information for the LLM.
4. **Store and search vectors** – Use Pinecone, a vector database, for semantic search to retrieve the most relevant documents or data for the LLM.

 Source documents:
Doc 1: 
 Langchain helps build LLM applications
Doc 2: 
 Langchain can be used to develop Agentic AI application
Doc 3: 
 Langchain has many tyoe of retrievers
Doc 4: 
 Pinecone is a vector database for Semantic Search


In [10]:
# Debug: see how many docs each retriever returns
print("sparse k:", getattr(sparse_retriever, "k", None))
print("dense k:", getattr(dense_retriever, "k", None))

q = "How can I build applications using LLMs?"
print("dense results:", len(dense_retriever.invoke(q)))
print("sparse results:", len(sparse_retriever.invoke(q)))
print("hybrid results:", len(hybrid_retriever.invoke(q)))

# Check the rag_chain output
resp = rag_chain.invoke({"input": q})
print("rag response context len:", len(resp.get("context", [])))

sparse k: 3
dense k: None
dense results: 4
sparse results: 3
hybrid results: 4
rag response context len: 4
