# ReRanking

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [2]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192", max_tokens=1000)

#Vector DB

In [3]:
def replace_t_with_space(list_of_documents):
    """
        Replace all the tab ('\t') keys with white space in the page content of list of documents.

        Args:
            list_of_documents: A list of document obj, each with 'page_content' attribute.
        Return:
            The modified list of documents with tab characters replaced by white spaces
    """
    for doc in list_of_documents:
        doc.page_content = doc.page_content.replace('\t', " ")
    return list_of_documents

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
#from helper_functions import Helpers

class Data_Ingestion_Pipe:
    """
    A pipeline that showcases the ingestion of documet data into vectorstore
    """
    def __init__(self, file_path: str = r"D:\My Files\RAG-Techniques\RAG.pdf"):
        self.file_path = file_path
        #self.helper_func = Helpers()
        #self.embed_provider = Embedding_Provider()

    
    async def encode_pdf(self, chunk_size: int =1000, chunk_overlap: int = 200):
        """
        Set of setps to stores the pdf documents in vectorestore in the form of embeddings
        Args:
            file_path: denotes the location of the file
            chunk_size : denote the size of each chunk the document to be split into
            chunk_overlap: connecting words in each chunk

        Return:
            A FAISS vector store containing the encoded book content.
        """
        #loads the pdf file
        try:
            loader = PyPDFLoader(self.file_path)
            docs = await loader.aload()
        except FileNotFoundError as e:
            raise f"Error occured: {e}"
        # split the doc file into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = chunk_size, chunk_overlap = chunk_overlap
        )
        doc_chunks = text_splitter.split_documents(documents=docs)

        cleaned_texts = replace_t_with_space(doc_chunks)
        #embeddings
        embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        #vector db
        faiss_vstore = await FAISS.afrom_documents(cleaned_texts, embedding=embedding)
        return faiss_vstore

In [5]:
import nest_asyncio
nest_asyncio.apply()
import asyncio

vectorstore = asyncio.run(Data_Ingestion_Pipe().encode_pdf())

  from .autonotebook import tqdm as notebook_tqdm


#Reranking Function

In [6]:
from pydantic import BaseModel, Field

class Ranking(BaseModel):
    relevance_score: float = Field(default=0.0, description="Relevance score of the document to the query")

In [7]:
from typing_extensions import List
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate

async def rerank_documents(query: str, documents: List[Document], top_k: int = 3) -> List[Document]:
    """
    Rerank the documents based on their relevance to the query using a language model.
    Args:
        query (str): The query to rerank the documents against.
        documents (List[Document]): The list of documents to rerank.
        top_k (int): The number of top documents to return.
    Returns:
        List[Document]: The reranked list of documents.
    """
    prompt_template = PromptTemplate(
        input_variables=["query", "documents"],
        template="""On a scale of 1-10, rate the relevance of the following document to the query. Consider the specific context and intent of the query, not just keyword matches.
        Query: {query}
        Document: {doc}
        Relevance Score:"""
    )
    llm_chain = prompt_template | llm.with_structured_output(Ranking)

    scored_documents = []
    for doc in documents:
        input_data = {"query": query,"doc": doc.page_content}
        response = await llm_chain.ainvoke(input_data)
        score = response.relevance_score
        try:
            score = float(score)
        except ValueError:
            score = 0.0
        scored_documents.append((doc, score))
    reranked_= sorted(scored_documents, key=lambda x: x[1], reverse=True)
    return [doc for doc, _ in reranked_[:top_k]]

#TestCase

In [8]:
query = "What is the purpose of the RAG technique in AI?"
initial_docs = vectorstore.similarity_search(query, k=5)
reranked_docs = rerank_documents(query, initial_docs)

In [9]:
print("Top initial documents:")
for i, doc in enumerate(initial_docs[:3]):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:200] + "...")

Top initial documents:

Document 1:
ﬂexibility to adjust the number of retrieved documents at test time, which can affect performance and
runtime. Figure 3 (left) shows that retrieving more documents at test time monotonically improves
...

Document 2:
Broader Impact
This work offers several positive societal beneﬁts over previous work: the fact that it is more
strongly grounded in real factual knowledge (in this case Wikipedia) makes it “hallucinat...

Document 3:
2 Methods
We explore RAG models, which use the input sequencex to retrieve text documents z and use them
as additional context when generating the target sequence y . As shown in Figure 1, our models
...


In [10]:
print(f"Query: {query}\n")
print("Top reranked documents:")
rerank_response = asyncio.run(reranked_docs)

Query: What is the purpose of the RAG technique in AI?

Top reranked documents:


In [12]:
for i, doc in enumerate(rerank_response):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:200] + "...")


Document 1:
blob/master/examples/rag/README.md and an interactive demo of a RAG model can be found
at https://huggingface.co/rag/
2https://github.com/pytorch/fairseq
3https://github.com/huggingface/transformers
1...

Document 2:
2 Methods
We explore RAG models, which use the input sequencex to retrieve text documents z and use them
as additional context when generating the target sequence y . As shown in Figure 1, our models
...

Document 3:
ﬂexibility to adjust the number of retrieved documents at test time, which can affect performance and
runtime. Figure 3 (left) shows that retrieving more documents at test time monotonically improves
...


#Custom Retriever based on ReRanking

In [13]:
from langchain_core.retrievers import BaseRetriever
from typing_extensions import Any

class CustomRetriever(BaseRetriever, BaseModel):

    vectorstore: Any = Field(description="vectorstore for initial retrieval")

    class Config:
        arbitrary_types_allowed = True
    
    def get_relevant_documents(self, query: str) -> List[Document]:
        initial_docs = self.vectorstore.similarity_search(query, k=10)
        response = asyncio.run(rerank_documents(query, initial_docs))
        return response

  class CustomRetriever(BaseRetriever, BaseModel):


In [14]:
custom_retriever = CustomRetriever(vectorstore=vectorstore)

In [15]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever= custom_retriever,
    return_source_documents=True
)

In [16]:
result =  qa_chain.invoke({"query": query})

print(f"\nQuestion: {query}")
print(f"Answer: {result['result']}")
print("\nRelevant source documents:")
#print(result)


Question: What is the purpose of the RAG technique in AI?
Answer: According to the provided context, the purpose of the RAG (Regressive Attention Generative) technique is to retrieve text documents (z) based on the input sequence (x) and use them as additional context when generating the target sequence (y). The technique combines a retriever (DPR) and a generator to model the probability distribution over the target sequence. The retriever retrieves relevant documents based on the input sequence, and the generator uses these documents to generate the target sequence.

Relevant source documents:
{'query': 'What is the purpose of the RAG technique in AI?', 'result': 'According to the provided context, the purpose of the RAG (Regressive Attention Generative) technique is to retrieve text documents (z) based on the input sequence (x) and use them as additional context when generating the target sequence (y). The technique combines a retriever (DPR) and a generator to model the probabilit

In [17]:
for i, doc in enumerate(result["source_documents"]):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:200] + "...")


Document 1:
2 Methods
We explore RAG models, which use the input sequencex to retrieve text documents z and use them
as additional context when generating the target sequence y . As shown in Figure 1, our models
...

Document 2:
blob/master/examples/rag/README.md and an interactive demo of a RAG model can be found
at https://huggingface.co/rag/
2https://github.com/pytorch/fairseq
3https://github.com/huggingface/transformers
1...

Document 3:
before marginalizing, and repeating the process with the following output token, Formally, we deﬁne:
pRAG-Token(y|x) ≈
N∏
i
∑
z∈top-k(p(·|x))
pη(z|x)pθ(yi|x,z,y 1:i−1)
Finally, we note that RAG can be...


#Cross Encoder

In [18]:
from sentence_transformers import CrossEncoder

cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

class CrossEncoderRetriever(BaseRetriever, BaseModel):
    vectorstore: Any = Field(description="Vector store for initial retrieval")
    cross_encoder: Any = Field(description="Cross-encoder model for reranking")
    k: int = Field(default=5, description="Number of documents to retrieve initially")
    rerank_top_k: int = Field(default=3, description="Number of documents to return after reranking")

    class Config:
        arbitrary_types_allowed = True

    def get_relevant_documents(self, query: str) -> List[Document]:
        # Initial retrieval
        initial_docs = self.vectorstore.similarity_search(query, k=self.k)
        
        # Prepare pairs for cross-encoder
        pairs = [[query, doc.page_content] for doc in initial_docs]
        
        # Get cross-encoder scores
        scores = self.cross_encoder.predict(pairs)
        
        # Sort documents by score
        scored_docs = sorted(zip(initial_docs, scores), key=lambda x: x[1], reverse=True)
        
        # Return top reranked documents
        return [doc for doc, _ in scored_docs[:self.rerank_top_k]]

    async def aget_relevant_documents(self, query: str) -> List[Document]:
        raise NotImplementedError("Async retrieval not implemented")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  class CrossEncoderRetriever(BaseRetriever, BaseModel):
  class CrossEncoderRetriever(BaseRetriever, BaseModel):


In [21]:
# Create the cross-encoder retriever
cross_encoder_retriever = CrossEncoderRetriever(
    vectorstore=vectorstore,
    cross_encoder=cross_encoder,
    k=10,  # Retrieve 10 documents initially
    rerank_top_k=5  # Return top 5 after reranking
)

# Create the RetrievalQA chain with the cross-encoder retriever
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=cross_encoder_retriever,
    return_source_documents=True
)

# Example query
query = "What is RAG in AI?"
result = qa_chain({"query": query})

print(f"\nQuestion: {query}")
print(f"Answer: {result['result']}")
print("\nRelevant source documents:")
for i, doc in enumerate(result["source_documents"]):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:200] + "...")


Question: What is RAG in AI?
Answer: RAG stands for Retrieval-Augmented Generation, which is a type of artificial intelligence (AI) model that combines the strengths of both retrieval-based and generation-based language models.

Relevant source documents:

Document 1:
Broader Impact
This work offers several positive societal beneﬁts over previous work: the fact that it is more
strongly grounded in real factual knowledge (in this case Wikipedia) makes it “hallucinat...

Document 2:
to a lesser extent, including that it might be used to generate abuse, faked or misleading content in
the news or on social media; to impersonate others; or to automate the production of spam/phishing...

Document 3:
2 Methods
We explore RAG models, which use the input sequencex to retrieve text documents z and use them
as additional context when generating the target sequence y . As shown in Figure 1, our models
...

Document 4:
in 71% of cases, and a gold article is present in the top 10 retrieved articles