# RAG from scratch: Query Transformations

Query Transformations mean that our approach focuses on re-writing and/or modifying questions for retrieval.

In [1]:
!pip install -q langchain_community tiktoken langchainhub chromadb langchain


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
from access import Access

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = Access.LANGCHAIN_API_KEY

In multi query, we are going to have 1 question, based on that, we will feed it to an ai model to make multiple queries to gather data from the vectorstore and retrieve the correct documents, by feeding these documents into the LLM, we can get the most accurate answer.

In [3]:
import bs4
from langchain_classic import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama import ChatOllama, OllamaEmbeddings

# === Indexing ===
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=OllamaEmbeddings(model="mxbai-embed-large"))

retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.


## Prompt

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser

# Prompt
template = """
You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}
"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives
    | ChatOllama(model="llama3.1", temperature=0)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='\nYou are an AI language model assistant. Your task is to generate five \ndifferent versions of the given user question to retrieve relevant documents from a vector \ndatabase. By generating multiple perspectives on the user question, your goal is to help\nthe user overcome some of the limitations of the distance-based similarity search. \nProvide these alternative questions separated by newlines. Original question: {question}\n'), additional_kwargs={})])
| ChatOllama(model='llama3.1', temperature=0.0)
| StrOutputParser()
| RunnableLambda(...)

In [12]:
def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    seen = set()
    uniq_docs = []
    
    for sublist in documents:
        for doc in sublist:
            key = (doc.page_content, tuple(sorted(doc.metadata.items())))
            if key not in seen:
                seen.add(key)
                uniq_docs.append(doc)
                
    return uniq_docs
    # # Flatten list of lists, and convert each Document to string
    # flattened_docs = [Serializable(doc) for sublist in documents for doc in sublist]
    
    # # Get unique documents
    # unique_docs = list(set(flattened_docs))
    
    # # Return
    # return [loads(doc) for doc in unique_docs]

question = "What is the definition of reward hacking?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)


16

In [13]:
docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2024-11-28-reward-hacking/'}, page_content='According to their experiments, LLMs are sensitive to the position of responses and suffer from positional bias (i.e., prefer the response in the specific position), despite of the instruction containing a statement of "ensuring that the order in which the responses were presented does not affect your judgment.". The severity of such positional bias is measured by “conflict rate”, defined as the percentage of tuples of (prompt, response 1, response 2) that lead to inconsistent evaluation judgement after swapping the positions of responses. Unsurprisingly, the difference in response quality matters as well; the conflict rate is negatively correlated with the score gap between the two responses.'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2024-11-28-reward-hacking/'}, page_content='Multiple evidence calibration (MEC): The evaluator model is asked to provi

In [16]:
from operator import itemgetter
from langchain_ollama import ChatOllama
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOllama(model="llama3.1", temperature=0)

final_rag_chain = (
    {"context": retrieval_chain,
     "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Reward hacking refers to the possibility of an agent gaming the reward function to achieve high rewards through undesired behavior, as proposed by Amodei et al. in their seminal paper "Concrete Problems in AI Safety" (2016). It involves exploiting the task specification or finding "holes" in the design of the reward function to achieve higher proxy rewards but lower true rewards.'