# Hybrid Search Strategies
#### Combining Dense and Sparse matrix for better context retrieval from vectorstore

In [1]:
print("All okay!")

All okay!


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain_classic.retrievers import EnsembleRetriever
from langchain_core.documents import Document

In [16]:
# Step 1: Sample documents
docs = [
    Document(page_content="LangChain helps build LLM applications."),
    Document(page_content="Pinecone is a vector database for semantic search."),
    Document(page_content="The Eiffel Tower is located in Paris"),
    Document(page_content="LangChain can be used to develop Agentic AI applications."),
    Document(page_content="LangChain has many types of retrievers.")
]

# Step 2: Dense retriever (FAISS + HuggingFace)
embedding_model=HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
dense_vectorstore=FAISS.from_documents(docs,embedding_model)
dense_retriever=dense_vectorstore.as_retriever()
dense_retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x141665e10>, search_kwargs={})

In [19]:
# Step 3: Sparse retriever (BM25)
sparse_retriever=BM25Retriever.from_documents(docs)
sparse_retriever.k=3

print(sparse_retriever)

# Step 4: Combine Dense and Sparse retriever into a Hybrid retriever
hybrid_retriever=EnsembleRetriever(
    retrievers=[dense_retriever,sparse_retriever],
    weight=[0.7,0.3] ### alpha - hyperparamater
)
print(hybrid_retriever)

vectorizer=<rank_bm25.BM25Okapi object at 0x1048925d0> k=3
retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x141665e10>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x1048925d0>, k=3)] weights=[0.5, 0.5]


In [20]:
# Step 5: Query and get the results
query = "How can I build an application using LLM?"
results = hybrid_retriever.invoke(query)

# Step 6: Print results
for i, doc in enumerate(results):
    print(f"\n Document {i+1}: {doc.page_content}")


 Document 1: LangChain helps build LLM applications.

 Document 2: LangChain can be used to develop Agentic AI applications.

 Document 3: LangChain has many types of retrievers.

 Document 4: Pinecone is a vector database for semantic search.


#### RAG Pipeline with hybrid retriever

In [22]:
from langchain_classic.chat_models import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains.retrieval import create_retrieval_chain


In [60]:
# Step 7: Prompt Template
prompt = PromptTemplate.from_template(
    """ Answer the question based on the context below. 
    
    Context:
    {context}

    Question: {input}

    Answer:"""
)
# Step 8: LLM
llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0.2)

In [61]:
### Creat stuff document chain
document_chain=create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain=create_retrieval_chain(retriever=hybrid_retriever, combine_docs_chain=document_chain)

In [30]:
# Step 9: Test the RAG Chain
query={"input":"How can I build apps using LLM?"}
response=rag_chain.invoke(query)

# Step 10: Output
print(f"\nAnswer: {response['answer']}")
print("\nSource Documents:")
for i, doc in enumerate(response['context']):
    print(f"\nDoc {i+1}: {doc.page_content}")


Answer: You can build apps using LLMs by leveraging frameworks like LangChain, which helps develop LLM applications and agentic AI systems. Additionally, integrating tools such as various retrievers for data retrieval and using vector databases like Pinecone for semantic search can enhance your application's capabilities.

Source Documents:

Doc 1: LangChain helps build LLM applications.

Doc 2: LangChain can be used to develop Agentic AI applications.

Doc 3: LangChain has many types of retrievers.

Doc 4: Pinecone is a vector database for semantic search.
