
<h1 style="text-align: center;">Query Transformation</h1>

# 0. Setting Up The Env

## 0.1 Install

In [1]:

!uv pip install bs4 langchainhub langchain_community tiktoken langchain-openai langchainhub chromadb langchain

[2mUsing Python 3.13.3 environment at: D:\01 Work\10-New-Learnings\.venv[0m
[2mAudited [1m8 packages[0m [2min 77ms[0m[0m


## 0.2 Import

In [2]:
import os
from dotenv import load_dotenv

In [3]:
import tiktoken

In [4]:
import bs4

# LangChain core
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Hub
from langsmith import Client
# Loaders & Vector DBs 
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

# OpenAI 
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


## 0.3 Constants

In [5]:

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

In [6]:

load_dotenv(override=True)


openai_api_key = os.getenv('OPENAI_API_KEY')
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')

In [7]:
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

if langchain_api_key:
    print(f"Langchain API Key exists and begins {langchain_api_key[:8]}")
else:
    print("Langchain API Key not set")

OpenAI API Key exists and begins sk-proj-
Langchain API Key exists and begins lsv2_pt_


In [8]:
os.environ['OPENAI_API_KEY'] = openai_api_key
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key

# 1. Multi Query

## 1.1 Indexing

In [9]:
# Load blog

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [10]:
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

In [11]:
# Index
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

In [12]:
from langchain_core.prompts import ChatPromptTemplate


## 1.2 Multi Query

In [13]:
# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""

prompt_perspectives = ChatPromptTemplate.from_template(template)


generate_queries = (
    prompt_perspectives 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

## 1.3 Retrieve

In [16]:
from langchain_core.load import dumps, loads


In [14]:

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

In [17]:


question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

  return [loads(doc) for doc in unique_docs]


7

## 1.4 RAG

In [18]:
from operator import itemgetter

In [19]:
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. This process allows the agent to plan ahead and think step by step, transforming big tasks into multiple manageable tasks. Task decomposition can be done through various techniques such as Chain of Thought (CoT) and Tree of Thoughts, as well as using task-specific instructions or human inputs.'

# End