# Self-RAG

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

#Helper functions

In [2]:
def replace_t_with_space(list_of_documents):
    """
        Replace all the tab ('\t') keys with white space in the page content of list of documents.

        Args:
            list_of_documents: A list of document obj, each with 'page_content' attribute.
        Return:
            The modified list of documents with tab characters replaced by white spaces
    """
    for doc in list_of_documents:
        doc.page_content = doc.page_content.replace('\t', " ")
    return list_of_documents

#Database

In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
#from helper_functions import Helpers

class Data_Ingestion_Pipe:
    """
    A pipeline that showcases the ingestion of documet data into vectorstore
    """
    def __init__(self, file_path: str = r"D:\My Files\RAG-Techniques\RAG.pdf"):
        self.file_path = file_path
        #self.helper_func = Helpers()
        #self.embed_provider = Embedding_Provider()

    
    def encode_pdf(self, chunk_size: int =1000, chunk_overlap: int = 200):
        """
        Set of setps to stores the pdf documents in vectorestore in the form of embeddings
        Args:
            file_path: denotes the location of the file
            chunk_size : denote the size of each chunk the document to be split into
            chunk_overlap: connecting words in each chunk

        Return:
            A FAISS vector store containing the encoded book content.
        """
        #loads the pdf file
        try:
            loader = PyPDFLoader(self.file_path)
            docs = loader.load()
        except FileNotFoundError as e:
            raise f"Error occured: {e}"
        # split the doc file into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = chunk_size, chunk_overlap = chunk_overlap
        )
        doc_chunks = text_splitter.split_documents(documents=docs)

        cleaned_texts = replace_t_with_space(doc_chunks)
        #embeddings
        embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        #vector db
        faiss_vstore = FAISS.from_documents(cleaned_texts, embedding=embedding)
        return faiss_vstore

#Retriever Function

In [4]:
def doc_retriever():
    """
    retrieves top k similar documents
    """
    obj = Data_Ingestion_Pipe()
    vstore = obj.encode_pdf()
    retriever = vstore.as_retriever(search_kwargs={"k": 3})
    return retriever

#LLM 

In [5]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192",max_tokens=1000)

#Prompt Templates

In [6]:
from pydantic import BaseModel, Field

class RetrievalResponse(BaseModel):
    response: str = Field(..., title="Check if the retrieval is relevant", description="Output 'yes' or 'no' only")

In [7]:
from langchain_core.prompts import PromptTemplate

retrieval_prompt = PromptTemplate(
    input_variables=["query"],
    template="Given the query '{query}', determine if retrieval is neccessary. Output only 'yes' or 'no' "
)

In [8]:
class RelevnaceResponse(BaseModel):
    response: str = Field(..., title="Determine if the context is relevant", description="Output only 'Relevent' or 'Irrelevant'")

In [9]:
relevance_prompt = PromptTemplate(
    input_variables=["query", "context"],
    template="Given the response '{query}', and context '{context}', determine if the context is relevent. Output only 'Relevent' or 'Irrelevant'"
)

In [10]:
class GenerateResponse(BaseModel):
    response: str = Field(..., title="Generate response", description="The generated response")

In [11]:
generation_prompt = PromptTemplate(
    input_variables=["query", "context"],
    template="Given the query '{query}' and context '{context}', generate ther response."
)

In [12]:
class SupportResponse(BaseModel):
    response: str = Field(..., title="Determine if response is supported", description="Output 'Fully Supported', 'Partially Supported' and 'Not supported'")

In [13]:
support_prompt = PromptTemplate(
    input_variables=["response", "context"],
    template="Given the response '{response}' and the context '{context}', determine if the response is supported by context. Output only 'Fully Supported', 'Partially Supported' or 'Not Supported'"
)

In [14]:
class UtilityResponse(BaseModel):
    response: str = Field(..., title="Utility rating", description="Rate the utility of response from 1 to 5")

In [15]:
utility_prompt = PromptTemplate(
    input_variables=["query", "response"],
    template="Given the query '{query}', and the response '{response}', rate the utility of response from 1 to 5"
)

#LLM Chain

In [16]:
retrieval_chain = retrieval_prompt | llm.with_structured_output(RetrievalResponse)
relevance_chain = relevance_prompt | llm.with_structured_output(RelevnaceResponse)
generation_chain = generation_prompt | llm.with_structured_output(GenerateResponse)
support_chain = support_prompt | llm.with_structured_output(SupportResponse)
utility_chain = utility_prompt | llm.with_structured_output(UtilityResponse)

#Self-RAG Flow

In [17]:
def self_rag(query, vectorstore, top_k=3):
    print(f"\nProcessing query: {query}")
    
    # Step 1: Determine if retrieval is necessary
    print("Step 1: Determining if retrieval is necessary...")
    input_data = {"query": query}
    retrieval_decision = retrieval_chain.invoke(input_data).response.strip().lower()
    print(f"Retrieval decision: {retrieval_decision}")
    
    if retrieval_decision == 'yes':
        # Step 2: Retrieve relevant documents
        print("Step 2: Retrieving relevant documents...")
        docs = vectorstore.similarity_search(query, k=top_k)
        contexts = [doc.page_content for doc in docs]
        print(f"Retrieved {len(contexts)} documents")
        
        # Step 3: Evaluate relevance of retrieved documents
        print("Step 3: Evaluating relevance of retrieved documents...")
        relevant_contexts = []
        for i, context in enumerate(contexts):
            input_data = {"query": query, "context": context}
            relevance = relevance_chain.invoke(input_data).response.strip().lower()
            print(f"Document {i+1} relevance: {relevance}")
            if relevance == 'relevant':
                relevant_contexts.append(context)
        
        print(f"Number of relevant contexts: {len(relevant_contexts)}")
        
        # If no relevant contexts found, generate without retrieval
        if not relevant_contexts:
            print("No relevant contexts found. Generating without retrieval...")
            input_data = {"query": query, "context": "No relevant context found."}
            return generation_chain.invoke(input_data).response
        
        # Step 4: Generate response using relevant contexts
        print("Step 4: Generating responses using relevant contexts...")
        responses = []
        for i, context in enumerate(relevant_contexts):
            print(f"Generating response for context {i+1}...")
            input_data = {"query": query, "context": context}
            response = generation_chain.invoke(input_data).response
            
            # Step 5: Assess support
            print(f"Step 5: Assessing support for response {i+1}...")
            input_data = {"response": response, "context": context}
            support = support_chain.invoke(input_data).response.strip().lower()
            print(f"Support assessment: {support}")
            
            # Step 6: Evaluate utility
            print(f"Step 6: Evaluating utility for response {i+1}...")
            input_data = {"query": query, "response": response}
            utility = int(utility_chain.invoke(input_data).response)
            print(f"Utility score: {utility}")
            
            responses.append((response, support, utility))
        
        # Select the best response based on support and utility
        print("Selecting the best response...")
        best_response = max(responses, key=lambda x: (x[1] == 'fully supported', x[2]))
        print(f"Best response support: {best_response[1]}, utility: {best_response[2]}")
        return best_response[0]
    else:
        # Generate without retrieval
        print("Generating without retrieval...")
        input_data = {"query": query, "context": "No retrieval necessary."}
        return generation_chain.invoke(input_data).response


In [18]:
query = "What is the impact of climate change on the environment?"
vectorstore = Data_Ingestion_Pipe().encode_pdf()
response = self_rag(query, vectorstore)

print("\nFinal response:")
print(response)

  from .autonotebook import tqdm as notebook_tqdm



Processing query: What is the impact of climate change on the environment?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: irrelevant
Document 2 relevance: irrelevant
Document 3 relevance: relevant
Number of relevant contexts: 1
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 4

Final response:
The impact of climate change on the environment is a pressing concern that requires immediate attention. Rising global temperatures are causing melting of polar ice caps, sea-level rise, and extreme weather events, which have devastating effects on ecosystems and human s

In [19]:
query = "What are Different types of rag?"
response = self_rag(query, vectorstore)

print("\nFinal response:")
print(response)


Processing query: What are Different types of rag?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevent
Document 2 relevance: relevent
Document 3 relevance: relevant
Number of relevant contexts: 1
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: partially supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Selecting the best response...
Best response support: partially supported, utility: 4

Final response:
RAG models are classified into two main categories: Graph-based RAG and Sequence-based RAG. Graph-based RAG models, such as Graph-RAG, use graph neural networks to retrieve relevant text passages and incorporate them into the target sequence. Sequence-based RAG models, on the other hand, ut