## Hybrid Retriever- Combining Dense And Sparse Retriever

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document



In [19]:
# Step 1: Sample documents
docs = [
    Document(page_content="‡∫ß‡∫¥‡∫ó‡∫∞‡∫ç‡∫≤‡∫™‡∫≤‡∫î‡∫Ñ‡∫≠‡∫°‡∫û‡∫¥‡∫ß‡ªÄ‡∫ï‡∫µ‡ªÅ‡∫°‡ªà‡∫ô‡∫™‡∫≤‡∫Ç‡∫≤‡∫ß‡∫¥‡∫ä‡∫≤‡∫ó‡∫µ‡ªà‡∫™‡∫≥‡∫Ñ‡∫±‡∫ô‡ªÉ‡∫ô‡∫ç‡∫∏‡∫Å‡∫õ‡∫±‡∫î‡∫à‡∫∏‡∫ö‡∫±‡∫ô."),
    Document(page_content="‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Ç‡∫ª‡ªâ‡∫≤‡ªÉ‡∫à‡∫ß‡∫¥‡∫ó‡∫∞‡∫ç‡∫≤‡∫™‡∫≤‡∫î‡∫Ñ‡∫≠‡∫°‡∫û‡∫¥‡∫ß‡ªÄ‡∫ï‡∫µ‡∫à‡∫∞‡∫ä‡ªà‡∫ß‡∫ç‡ªÉ‡∫´‡ªâ‡∫ô‡∫±‡∫Å‡∫™‡∫∂‡∫Å‡∫™‡∫≤‡∫°‡∫µ‡∫ó‡∫±‡∫Å‡∫™‡∫∞‡∫ó‡∫µ‡ªà‡∫à‡∫≥‡ªÄ‡∫õ‡∫±‡∫ô."),
    Document(page_content="‡∫ô‡∫±‡∫Å‡∫™‡∫∂‡∫Å‡∫™‡∫≤‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫´‡∫≤‡∫ß‡∫Ω‡∫Å‡ªÄ‡∫Æ‡∫±‡∫î‡∫á‡∫≤‡∫ô‡∫ó‡∫≥‡ªÑ‡∫î‡ªâ‡∫á‡ªà‡∫≤‡∫ç‡∫Ç‡∫∂‡ªâ‡∫ô‡∫ñ‡ªâ‡∫≤‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡∫Æ‡∫π‡ªâ‡∫î‡ªâ‡∫≤‡∫ô‡∫ô‡∫µ‡ªâ."),
    Document(page_content="‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î (Artificial Intelligence), ‡∫´‡∫º‡∫∑ AI, ‡ªÅ‡∫°‡ªà‡∫ô‡∫•‡∫∞‡∫ö‡∫ª‡∫ö‡∫Ñ‡∫≠‡∫°‡∫û‡∫¥‡∫ß‡ªÄ‡∫ï‡∫µ‡∫ó‡∫µ‡ªà‡∫ñ‡∫∑‡∫Å‡∫™‡ªâ‡∫≤‡∫á‡∫Ç‡∫∂‡ªâ‡∫ô‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Æ‡∫Ω‡∫ô‡∫Æ‡∫π‡ªâ, ‡∫Ñ‡∫¥‡∫î, ‡ªÅ‡∫•‡∫∞‡ªÅ‡∫Å‡ªâ‡ªÑ‡∫Ç‡∫ö‡∫±‡∫ô‡∫´‡∫≤‡ªÑ‡∫î‡ªâ‡∫Ñ‡∫∑‡∫Å‡∫±‡∫ö‡∫°‡∫∞‡∫ô‡∫∏‡∫î."),
    Document(page_content="‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î‡ªÅ‡∫°‡ªà‡∫ô‡ªÄ‡∫ï‡∫±‡∫Å‡ªÇ‡∫ô‡ªÇ‡∫•‡∫ä‡∫µ‡∫ó‡∫µ‡ªà‡∫Å‡∫≥‡∫•‡∫±‡∫á‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤‡∫¢‡ªà‡∫≤‡∫á‡ªÑ‡∫ß‡∫ß‡∫≤."),
    Document(page_content="AI ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫ô‡∫≥‡∫°‡∫≤‡ªÉ‡∫ä‡ªâ‡ªÉ‡∫ô‡∫´‡∫º‡∫≤‡∫ç‡∫Ç‡∫∞‡ªÅ‡ªú‡∫á‡∫Å‡∫≤‡∫ô."),
    Document(page_content="‡∫´‡∫º‡∫≤‡∫ç‡∫ö‡ªç‡∫•‡∫¥‡∫™‡∫±‡∫î‡ªÉ‡∫´‡∫ç‡ªà‡∫Å‡∫≥‡∫•‡∫±‡∫á‡∫•‡∫ª‡∫á‡∫ó‡∫∂‡∫ô‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤ AI."),
    Document(page_content="‡∫™‡∫õ‡∫õ ‡∫•‡∫≤‡∫ß‡ªÅ‡∫°‡ªà‡∫ô‡∫õ‡∫∞‡ªÄ‡∫ó‡∫î‡∫ó‡∫µ‡ªà‡∫°‡∫µ‡∫ß‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫∞‡∫ó‡∫≥‡∫≠‡∫±‡∫ô‡∫ç‡∫≤‡∫ß‡∫ô‡∫≤‡∫ô."),
    Document(page_content="‡∫õ‡∫∞‡∫ä‡∫≤‡∫ä‡∫ª‡∫ô‡∫•‡∫≤‡∫ß‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÄ‡∫õ‡∫±‡∫ô‡∫Å‡∫±‡∫ô‡ªÄ‡∫≠‡∫á‡ªÅ‡∫•‡∫∞‡∫°‡∫µ‡ªÉ‡∫à‡∫°‡∫±‡∫Å‡∫á‡∫≤‡∫°."),
    Document(page_content="‡∫Ñ‡∫ª‡∫ô‡∫•‡∫≤‡∫ß‡∫Ñ‡∫ª‡∫ô‡∫à‡∫¥‡∫á‡ªÉ‡∫à.")
] 

# Step 2: Dense Retriever (FAISS + HuggingFace)
embedding_model = HuggingFaceEmbeddings(model_name="D:\\model\\BAAI-bge-m3")
dense_vectorstore = FAISS.from_documents(docs, embedding_model)
dense_retriever = dense_vectorstore.as_retriever(search_kwargs={"k": 3})

In [20]:
### Sparse Retriever(BM25)
sparse_retriever=BM25Retriever.from_documents(docs)
sparse_retriever.k=3 ##top- k documents to retriever

## step 4 : Combine with Ensemble Retriever
hybrid_retriever=EnsembleRetriever(
    retrievers=[dense_retriever,sparse_retriever],
    weight=[0.7,0.3]
)


In [21]:
hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001D3536CD590>, search_kwargs={'k': 3}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001D356B95220>, k=3)], weights=[0.5, 0.5])

In [22]:
# Step 5: Query and get results
query = "AI ‡∫Ñ‡∫∑‡∫ç‡∫±‡∫á ‡ªÅ‡∫•‡∫∞ ‡∫™‡∫≥‡∫Ñ‡∫±‡∫ô‡ªÅ‡∫ô‡∫ß‡ªÉ‡∫î"
results = hybrid_retriever.invoke(query) 

# Step 6: Print results
for i, doc in enumerate(results):
    print(f"\nüîπ Document {i+1}:\n{doc.page_content}")


üîπ Document 1:
AI ‡∫™‡∫≤‡∫°‡∫≤‡∫î‡∫ô‡∫≥‡∫°‡∫≤‡ªÉ‡∫ä‡ªâ‡ªÉ‡∫ô‡∫´‡∫º‡∫≤‡∫ç‡∫Ç‡∫∞‡ªÅ‡ªú‡∫á‡∫Å‡∫≤‡∫ô.

üîπ Document 2:
‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î (Artificial Intelligence), ‡∫´‡∫º‡∫∑ AI, ‡ªÅ‡∫°‡ªà‡∫ô‡∫•‡∫∞‡∫ö‡∫ª‡∫ö‡∫Ñ‡∫≠‡∫°‡∫û‡∫¥‡∫ß‡ªÄ‡∫ï‡∫µ‡∫ó‡∫µ‡ªà‡∫ñ‡∫∑‡∫Å‡∫™‡ªâ‡∫≤‡∫á‡∫Ç‡∫∂‡ªâ‡∫ô‡ªÄ‡∫û‡∫∑‡ªà‡∫≠‡∫Æ‡∫Ω‡∫ô‡∫Æ‡∫π‡ªâ, ‡∫Ñ‡∫¥‡∫î, ‡ªÅ‡∫•‡∫∞‡ªÅ‡∫Å‡ªâ‡ªÑ‡∫Ç‡∫ö‡∫±‡∫ô‡∫´‡∫≤‡ªÑ‡∫î‡ªâ‡∫Ñ‡∫∑‡∫Å‡∫±‡∫ö‡∫°‡∫∞‡∫ô‡∫∏‡∫î.

üîπ Document 3:
‡∫Ñ‡∫ª‡∫ô‡∫•‡∫≤‡∫ß‡∫Ñ‡∫ª‡∫ô‡∫à‡∫¥‡∫á‡ªÉ‡∫à.

üîπ Document 4:
‡∫´‡∫º‡∫≤‡∫ç‡∫ö‡ªç‡∫•‡∫¥‡∫™‡∫±‡∫î‡ªÉ‡∫´‡∫ç‡ªà‡∫Å‡∫≥‡∫•‡∫±‡∫á‡∫•‡∫ª‡∫á‡∫ó‡∫∂‡∫ô‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤ AI.

üîπ Document 5:
‡∫õ‡∫∞‡∫ä‡∫≤‡∫ä‡∫ª‡∫ô‡∫•‡∫≤‡∫ß‡∫°‡∫µ‡∫Ñ‡∫ß‡∫≤‡∫°‡ªÄ‡∫õ‡∫±‡∫ô‡∫Å‡∫±‡∫ô‡ªÄ‡∫≠‡∫á‡ªÅ‡∫•‡∫∞‡∫°‡∫µ‡ªÉ‡∫à‡∫°‡∫±‡∫Å‡∫á‡∫≤‡∫°.


### RAG Pipeline with hybrid retriever

In [23]:
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [24]:
# Step 5: Prompt Template
prompt = PromptTemplate.from_template("""
Answer the question based on the context below.
Important: Response in Lao Language only.
Context:
{context}

Question: {input}
""")

## step 6-llm
llm=init_chat_model("groq:meta-llama/llama-4-maverick-17b-128e-instruct",temperature=0.4)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001D3536CE5D0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001D3536CEFD0>, model_name='meta-llama/llama-4-maverick-17b-128e-instruct', temperature=0.4, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [25]:
### Create stuff Docuemnt Chain
document_chain=create_stuff_documents_chain(llm=llm,prompt=prompt)

## create Full rAg chain
rag_chain=create_retrieval_chain(retriever=hybrid_retriever,combine_docs_chain=document_chain)
rag_chain


RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001D3536CD590>, search_kwargs={'k': 3}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001D356B95220>, k=3)], weights=[0.5, 0.5]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\nImportant: Response in Lao Language only.\nContext:\n{context}\n\nQuestio

In [26]:
# Step 9: Ask a question
query = {"input": "‡∫Ç‡ªâ‡∫≠‡∫ç‡∫à‡∫≥‡ªÄ‡∫õ‡∫±‡∫ô‡∫ï‡ªâ‡∫≠‡∫á‡∫Æ‡∫Ω‡∫ô ‡∫ö‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î‡∫ö‡ªç‡ªà"}
response = rag_chain.invoke(query)

# Step 10: Output
print("‚úÖ Answer:\n", response["answer"])

print("\nüìÑ Source Documents:")
for i, doc in enumerate(response["context"]):
    print(f"\nDoc {i+1}: {doc.page_content}")

‚úÖ Answer:
 ‡∫ó‡ªà‡∫≤‡∫ô‡∫Ñ‡∫ß‡∫ô‡∫Æ‡∫Ω‡∫ô‡∫Æ‡∫π‡ªâ‡∫Å‡ªà‡∫Ω‡∫ß‡∫Å‡∫±‡∫ö‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î‡ªÄ‡∫û‡∫≤‡∫∞‡∫°‡∫±‡∫ô‡ªÅ‡∫°‡ªà‡∫ô‡ªÄ‡∫ï‡∫±‡∫Å‡ªÇ‡∫ô‡ªÇ‡∫•‡∫ä‡∫µ‡∫ó‡∫µ‡ªà‡∫Å‡∫≥‡∫•‡∫±‡∫á‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤‡∫¢‡ªà‡∫≤‡∫á‡ªÑ‡∫ß‡∫ß‡∫≤ ‡ªÅ‡∫•‡∫∞ ‡∫°‡∫±‡∫ô‡∫à‡∫∞‡∫ä‡ªà‡∫ß‡∫ç‡ªÉ‡∫´‡ªâ‡∫ó‡ªà‡∫≤‡∫ô‡∫°‡∫µ‡∫ó‡∫±‡∫Å‡∫™‡∫∞‡∫ó‡∫µ‡ªà‡∫à‡∫≥‡ªÄ‡∫õ‡∫±‡∫ô‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫õ‡∫∞‡∫Å‡∫≠‡∫ö‡∫≠‡∫≤‡∫ä‡∫µ‡∫ö‡ªÉ‡∫ô‡∫≠‡∫∞‡∫ô‡∫≤‡∫Ñ‡∫ª‡∫î. ‡∫Å‡∫≤‡∫ô‡ªÄ‡∫Ç‡∫ª‡ªâ‡∫≤‡ªÉ‡∫à‡∫ß‡∫¥‡∫ó‡∫∞‡∫ç‡∫≤‡∫™‡∫≤‡∫î‡∫Ñ‡∫≠‡∫°‡∫û‡∫¥‡∫ß‡ªÄ‡∫ï‡∫µ ‡ªÅ‡∫•‡∫∞ ‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î‡∫à‡∫∞‡∫ä‡ªà‡∫ß‡∫ç‡ªÉ‡∫´‡ªâ‡∫ó‡ªà‡∫≤‡∫ô‡∫™‡∫≤‡∫°‡∫≤‡∫î‡ªÅ‡∫Å‡ªâ‡ªÑ‡∫Ç‡∫ö‡∫±‡∫ô‡∫´‡∫≤ ‡ªÅ‡∫•‡∫∞ ‡∫õ‡∫∞‡∫Å‡∫≠‡∫ö‡∫™‡ªà‡∫ß‡∫ô‡ªÉ‡∫ô‡∫Å‡∫≤‡∫ô‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤‡∫õ‡∫∞‡ªÄ‡∫ó‡∫î‡∫Ç‡∫≠‡∫á‡∫ó‡ªà‡∫≤‡∫ô.

üìÑ Source Documents:

Doc 1: ‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡∫î‡ªÅ‡∫°‡ªà‡∫ô‡ªÄ‡∫ï‡∫±‡∫Å‡ªÇ‡∫ô‡ªÇ‡∫•‡∫ä‡∫µ‡∫ó‡∫µ‡ªà‡∫Å‡∫≥‡∫•‡∫±‡∫á‡∫û‡∫±‡∫î‡∫ó‡∫∞‡∫ô‡∫≤‡∫¢‡ªà‡∫≤‡∫á‡ªÑ‡∫ß‡∫ß‡∫≤.

Doc 2: ‡∫Ñ‡∫ª‡∫ô‡∫•‡∫≤‡∫ß‡∫Ñ‡∫ª‡∫ô‡∫à‡∫¥‡∫á‡ªÉ‡∫à.

Doc 3: ‡∫õ‡∫±‡∫ô‡∫ç‡∫≤‡∫õ‡∫∞‡∫î‡∫¥‡