In [None]:
import os
from dotenv import load_dotenv
from langchain.retrievers import BM25Retriever
# from langchain_community.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain.retrievers import EnsembleRetriever

In [None]:
load_dotenv()

![Diagram](./images/Hybrid_Rag.png)

# What is Hybrid RAG?

So far, we've relied on semantic search (using vector embeddings) to find documents that are conceptually similar to our query. This is powerful, but sometimes you need the precision of a good old-fashioned keyword search (also known as lexical search). Hybrid RAG combines both methods to get the best of both worlds.

# Set up Retreivers

A hybrid RAG system uses multiple retrievers to fetch documents. In this tutorial, we will use a combination of a vector-based (semantic) retriever and a keyword-based retriever.

We'll start by loading an existing ChromaDB collection that contains our documents and their embeddings. 

In [None]:
# retrieve original docs from existing Chromadb collection

#define the embeddings model
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector_store = Chroma(
    collection_name="2_metadata_filtering_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_db",  # Where to save data locally
)


### Vector search retreiver

This retriever performs a semantic search. It finds documents that are conceptually similar to the query, even if they don't share the exact same keywords. We configure it to return the top 3 most similar documents.

In [None]:

chroma_retriever = vector_store.as_retriever(search_kwargs={"k": 3})


### Keyword search retreiver

Next, we set up a `BM25Retriever`. BM25 is a popular algorithm for information retrieval that ranks documents based on the frequency of the query terms in each document, while also accounting for document length. This is a "sparse" retrieval method because it relies on matching keywords.

In [None]:
# Lets fetch the documents already available in the vector store instead of scraping again
records = vector_store.get()
docs = [Document(page_content = doc, metadata = meta) for doc, meta in zip(records['documents'], records['metadatas'])]


In [None]:
# Now, we can create the BM25Retriever from these documents and set it to return the top 3 results.

bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k = 3 


# Retreival & Generation

With our retrievers in place, we can now define the generation part of our RAG pipeline.

In [None]:
#configure the llm
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")  

#set the prompt template
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

rag_prompt_template = PromptTemplate.from_template(template)

### Re-ranking with Reciprocal Rank Fusion

We will retrieve documents from both of our retrievers and then use a re-ranking algorithm to create a final, unified list of the most relevant documents.

Reciprocal Rank Fusion is a method that combines multiple ranked lists into a single, more robust list. It calculates a new score for each document based on its rank in the different retrieved lists. The formula gives more weight to documents that appear higher up in the rankings across the different lists.

In [None]:
# Re-Ranking algorithm
def reciprocal_rank_fusion(retrieved_lists, k=60):
    """
    Reranks documents using the Reciprocal Rank Fusion algorithm.
    
    Args:
        retrieved_lists: A list of lists, where each inner list contains
                         retrieved Document objects.
        k: A constant used in the RRF formula. Default is 60.

    Returns:
        A single list of documents, reranked and unique.
    """
    # Dictionary to hold the RRF scores for each document
    fused_scores = {}
    # Dictionary to store the Document objects themselves, keyed by their content
    doc_map = {}

    # Iterate through each list of retrieved documents
    for doc_list in retrieved_lists:
        # Iterate through each document in the list with its rank
        for rank, doc in enumerate(doc_list):
            content = doc.page_content
            if content not in fused_scores:
                fused_scores[content] = 0
                doc_map[content] = doc # Store the document object
            
            # Add the RRF score
            fused_scores[content] += 1 / (k + rank + 1)

    # Sort the documents based on their fused scores in descending order
    reranked_results = sorted(fused_scores.items(), key=lambda item: item[1], reverse=True)

    # Extract the sorted Document objects
    reranked_docs = [doc_map[content] for content, score in reranked_results]
    scores = [ score for content,score in reranked_results]
    return reranked_docs, scores

In [None]:
user_question = "what is the Economic Futures Program?"

In [None]:
# Retrieve relevant docs from both retrievers
chroma_docs = chroma_retriever.invoke(user_question)
bm25_docs = bm25_retriever.invoke(user_question)


In [None]:
# lets inspect the results form each retreiver

for doc in chroma_docs:
    print('-- chroma --', doc.page_content)

print("-"*100)

for doc in bm25_docs:
    print('-- bm25 --',doc.page_content)


In [None]:
# Rerank the results using Reciprocal Rank Fusion
reranked_docs, scores = reciprocal_rank_fusion([chroma_docs, bm25_docs])
for doc in reranked_docs:
    print('-- reranked --',doc.page_content)

print('scores: ', scores)

In [None]:
#Generate Answer

docs_content = "\n\n".join(doc.page_content for doc in reranked_docs)
prompt = rag_prompt_template.invoke({"question": user_question, "context": docs_content})
response = llm.invoke(prompt)

#generated response
response.content

# Ensemble Retreiver

LangChain provides a convenient `EnsembleRetriever` that automates the process of combining and re-ranking results from multiple retrievers. It uses a weighted version of Reciprocal Rank Fusion internally.

In [None]:
user_question = "what is the Economic Futures Program?"

We instantiate the EnsembleRetriever with our two retrievers and assign them weights. In this case, we'll give them equal importance.

In [None]:
# Create EnsembleRetriever (hybrid)
ensemble_retriever = EnsembleRetriever(
    retrievers=[chroma_retriever, bm25_retriever],
    weights=[0.5, 0.5]  # adjust weights as needed
)

retreived_docs = ensemble_retriever.invoke(user_question)

#inspect retreived docs
for doc in retreived_docs:
    print('-- ensemble --',doc.page_content)

Now, we can directly use the documents retrieved by the EnsembleRetriever to generate our final answer.

In [None]:
#Generate Answer
docs_content = "\n\n".join(doc.page_content for doc in reranked_docs)
prompt = rag_prompt_template.invoke({"question": user_question, "context": retreived_docs})
response = llm.invoke(prompt)

#generated response
response.content