<a href="https://colab.research.google.com/github/DeependraChaddha/RAG_Projects/blob/main/RAG_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

First, an environment will be set up then different method of query translation will be demnostrated. The methods to be deomnstrated are Multi-Query, RAG-Fusion, Decomposition, Step Back and HyDE

##Setting up Environment

Installing Packages

In [None]:
!pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain

Setting up Langsmith

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2']='true'
os.environ['LANGCHAIN_ENDPOINT']='https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY']=###YOUR OWN API KEY###

OpenAI api key

In [None]:
os.environ['OPENAI_API_KEY']=###your api key###

##Multi-Query

Indexing

In [None]:
##### INDEXING #####
#Loading document/ blog

import bs4
from langchain_community.document_loaders import WebBaseLoader
loader=WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),       #link of blog
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content","post-title", "post-header")
        )
    ),
)###Made a WebBaseLoader instance in this line
blog_docs=loader.load()#Used the WebBaseLoader instance to load the documents

#SPLIT
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter= RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)# made an instance of RecursiveCharacterTextSplitter
###Make the Split###
splits=text_splitter.split_documents(blog_docs)

#Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
vectorstore= Chroma.from_documents(documents=splits,
                                   embedding=OpenAIEmbeddings())#Stored the split documents in Chroma vector database using OpenAIEmbeddings
retriever=vectorstore.as_retriever()

Prompt

In [None]:
from langchain.prompts import ChatPromptTemplate

#Multi-Query:Different Perspectives
template="""You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives=ChatPromptTemplate.from_template(template)

#Now the prompt will be broken down to generate multiple independent queries then these queries independently make the RAG retrieve documents, this way a more wide search can be done inside the documents
from langchain_core.output_parsers import StrOutputParsers
from langchain_openai import ChatOpenAI

generate_queries=(
    prompt_perspectives
    | ChaOpenAI(temperature=0)
    |StrOutputParser()
    | (lambda x: x.split("\n"))
)##This generates a list of queries

In [None]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
  """This function makes a unique union of all the retrieved doucments"""
  #Flatten list of lists, convert each document to string
  flattened_docs=[dumps{docs} for sublist in documents for doc  in sublist]
  #getting unique documents
  unique_documents=list(set(flattened_docs))
  return [loads(doc) for doc in unique_docs]
#retrieve
question= "What is Task Decompositon for LLM agents?"
retrieval_chain=generate_queries |retriever.map()|get_unique_union #this gives the entire chain, first generate quesries then retrieves the different answers then get the unique union of all the documents retrieved
docs= retrieval_chain.invoke({"question":question})
len(docs)


In [None]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

#RAG
template= """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt=ChatPromptTemplate.from_template(template)

llm=ChatOpenAI(temperature =0)

#making final rag chain
final_rag_chain=(
    {"context":retrieval_chain,
     "question":itemgetter("question")}
    |prompt
    |llm
    |StrOutputParser()
)
final_rag_chain.invoke({"question":question})

##RAG Fusion

This is mostly similar to Multi-Query, except that after retrieveing multiple documents, the documents are reranked and given a score.

Prompt

In [None]:
from langchain.prompts import ChatPromptTemplate

#RAG-Fusion: Related
template="""You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion= ChatPromptTemplate.from_template(template)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries=(prompt_rag_fusion
                  | ChatOpenAI(temperature=0)
                  |StrOutputParser()
                  |(lambda x: x.split("\n"))
                  )

In [None]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60): #Takes multiple documents and k value used in RRF formula
  #1. Make a dicionary to stores fused scores of each document
  fused_scores={}

  #2. Iterate through each list of documents
  for docs in results:
    #2.1. iterating through each document in a list
    for rank, doc in enumerate(docs):
      #2.1.1. convert each doc to string (assuming docs can be serialized to JSON)
      doc_str=dumps(doc)
      #2.1.2. check if doc_str in not already present in fused_scores(to avoid repitition), if its not, then add with initia score=0
      if doc_str not in fused_scores:
        fused_scores[doc_str]=0
      #2.1.3. Retrieve the current score of the document(whether already present or just added)
      previous_score=fused_scores[doc_str]
      #2.1.4. Update score using RRF formula
      fused_scores+= 1/(rank + k)
  #3. Sort the documents based on rank in descending order
  reranked_results=[(loads(doc),score) for doc,score in sorted(fusion_scores.items().key= lambda x:x[1], reverse= True)]#Sorts fusion_scores dictionary according to score which is mentioned as the key and loads the doc and score into a list of tuples

  #4. Return the raranked list
  return reranked_results

#make chain
retrieval_chain_rag_fusion= generate_queries | retriever.map() |reciprocal_rank_fusion
docs= retrieval_chain_rag_fusion.invoke({"question":question})
len(docs)

In [None]:
from langchain_core.runnables import RunnablePassThrough

#RAG
template= """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt=ChatPromptTemplate.from_template(template)

final_rag_chain= (
    {"context":retrieval_chain_rag_fusion,
     "question": itemgetter("question")}
    | prompt
    |llm
    |StrOutputParser()
)

finalrag_chain.invoke({"question": question})

##Decomposition

This approach breaks down the query into smaller parts then retrieval is firstly done on smaller, simpler queries then the generated answer is also given as prompt with the actual question to get a better answer generated.

In [None]:
from langchain.prompts import ChatPromptTemplate

#Decomposition
template="""ou are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition=ChatPromptTemplate.from_template(template)


In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

#LLM
llm= ChatOpenAI(temperature=0) #This fixes the llm we use

#Chain
generate_queries_decomposition= (prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

#Run
question="What are the main components of an LLM-powered autonomous agent system?"
questions=generate_queries_decomposition.invoke({"question":question})

In [None]:
questions

2 ways of using the answers to the decomposed questions
a) Answer Recursively
b) Answer Individually

Answer Recursively

In [None]:
template= """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question:

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [None]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):

  #initialize formatted string
  formatted_string=""
  formatted_string += f"Question:{question} \n Answer:{answer}\n\n"
  return formatted_string.strip()

#specify llm
llm= ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

q_a_pairs=""

for q in questions:

  rag_chain=(
      {"context":itemgetter("question")| retriever,
       "question":itemgetter("question"),
       "q_a_pairs":itemgetter("q_a_pairs")} #this dictionary gives previous context, questions and q_a_pairs to the decomposition_prompt function
      |decomposition_prompt # this decomposes the prompt
      |llm # input the prompt in llm
      |StrOutputParser())#this parses the output of llm
  answer=rag_chain.invoke({"question":q, "q_a_pairs":q_a_pairs})
  q_a_pair=format_qa_pair(q,answer)
  q_a_pairs=q_a_pairs + "\n---\n" + q_a_pair

In [None]:
answer

Answer Individually

In [None]:
#Answering each question individually

from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassThrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

#RAG Prompt
prompt= hub.pull("rlm/rag-prompt")

def retieve_and_rag(question, prompt, sub_question_generator_chain):
  ###This function uses the rag on each sub-question###

  #1. Use the decomposition
  sub_questions=sub_question_generator_chain.invoke({"question":question})

  #2. Store RAG chain results
  #2.1. Initiialize list to store results
  rag_results=[]

  for sub_question in sub_questions:
    # 2.1.1 Retrieve for each sub-question
    retrieved_docs=retriever.get_relevant_documents(sub_question)

    #2.1.2 Use sub_questions and retrieved documents in RAG chain
    answer= (prompt_rag |llm |StrOutputParser()).invoke({"context":retrieved_docs,
                                                         "question":sub_question})
    #2.1.3 Append answer in the list
    rag_results.append(answer)

  #3. Return the results and the sub-questions
  return rag_results, sub_questions

#Wrap the returned values into RunnableLambda for Chain usage
answers,questions=retrieve_and_rag(question,
                                   prompt_rag,
                                   generate_queries_decomposition)#We use the same function to make sub-questions in both the approaches

In [None]:
#Make function to format Q & A pairs
def format_qa_pairs(questions, answers):

  #Initialize formatted string
  formatted_string=""
  for i,(question, answer) in enumerate(zip(questions,answers), start=1):
    formatted_string += f"Question {i}: {question} \n Answer {i}: {answer}\n\n"
  return formatted_string.strip()

context=format_qa_pairs(questions, answers)


#Prompt
template ="""ere is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt= ChatPromptTemplate.from_template(template) #This gives the entire prompt generated using all the context retrieved from the sub-questions.

final_rag_chain =(
    prompt
    |llm
    |StrOutputParser()
) #Built the entire RAG chain

#invoke context and question in the final rag chain
final_rag_chain.invoke({"context":context, "question":question})

##Step Back

This increases the abstraction of the question before asking the actual question to provide better context to the LLM to get a better answer

In [None]:
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
#Declaring examples of step back questions
examples= [{
    "input":"Could the members of The Police perform lawful arrests?"
    "output":"what can the members of The Police do?"
},
    {"input":"Jan Sindel’s was born in what country?"
    "output":"what is Jan Sindel’s personal history?"},]

#Transform to examine messages
example_prompt=ChatPromptTemplate.from_messages(
    [("human","{input}"),
     ("ai","{output}")]
)# Made a ChatPromptTemplate object
#Now, use this template object to make prompts
few_shot_prompt=FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
#Making the entire prompt to give to RAG
prompt=ChatPromptTemplate.from_messages(
    [
        ("system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        #Examples
        few_shot_prompt,
        #New Question
        ("user","{question}"),

    ]
)

In [None]:
#Make RAG chain
generate_queries_step_back=prompt|ChatOpenAI(temperature=0)| StrOutputParser()
question="What is task decomposition for LLM agents?"
#Invoke question in chain
generate_queries_step_back.invoke({"question":question})

In [None]:
#Response Prompt
response_prompt_template="""You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""
response_prompt= ChatPromptTemplate.from_template(response_prompt_template)

chain=(
    {#normal context
     "normal_context":RunnableLambda(lambda x:x["question"])| retriever, #this runnable lamda takes input and find the value associated to kargument "question" and pipes it to the retriever
     #step-back context
     "step_back_context":generate_queries_step_back|retriever, #pipes step back questions to the retriever which gives additional context
     #Pass on the question
     "question":lambda x: x["question"],}
    |response_prompt
    |ChatOpenAI(temperature=0)
    |StrOutputParser()
)

chain.invoke({"question":question})

##HyDE(Hypothetical Document Embedding)

In this method, hypothetical documents are generated based on the query made,then the hypothetical documents are embedded with the query in the vector database to give additional context for retrieval  

In [None]:
#make imports
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

#HyDE document generation
#make template
template="""Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde=ChatPromptTemplate.from_template(template)


#make RAG chain
generate_docs_for_retrieval=(
    prompt_hyde| ChatOpenAI(temperature=0) |StrOutputParser()
)

#Run
question= "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question":question})

In [None]:
#Retrieve
retrieval_chain= generate_docs_for_retrieval | retriever
retrieved_docs=retireval_chain.invoke("question":question)
retrieved_docs

In [None]:
#RAG
template="""Answer the following question based on this context:

{context}

Question: {question}"""

#make prompt using template
prompt= ChatPromptTemplate.from_template(template)


#make entire rag chain
final_rag_chain= (
    prompt
    |llm
    |StrOutputParser
)


#Use retrieved documents as context and the original question as the question/query
final_rag_chain.invoke({"context": retrieved_docs, "question": question})