#### **Retrieval Mechanisms**

In [18]:
# Loading the required libraries / packages 
from dotenv import load_dotenv
load_dotenv()
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings,HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq
from pydantic import BaseModel,Field
from typing import List
from langchain_core.runnables import RunnableLambda,RunnablePassthrough
from operator import itemgetter
from langchain.load import loads,dumps
from langchain.retrievers.document_compressors import FlashrankRerank
from flashrank import Ranker, RerankRequest
from langchain.retrievers import EnsembleRetriever,BM25Retriever,ContextualCompressionRetriever
from langchain_community.document_transformers import LongContextReorder
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.schema import Document
import bs4


In [2]:
# Setting the environment variables
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")

##### **1. Re-Ranking**

In [3]:
# loading the blog from the web
loader = WebBaseLoader(web_path="https://lilianweng.github.io/posts/2023-06-23-agent/",
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))),)
blog_docs = loader.load()

- Use from_tiktoken_encoder when working with OpenAI models or other token-based models to ensure chunking is token-aware.
- Use the default RecursiveCharacterTextSplitter when dealing with character-based chunking, such as for basic text processing where token limits are not a concern.

In [4]:
# split the text and store it into vectorstore
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300,chunk_overlap=50)
splitted_docs = text_splitter.split_documents(blog_docs)

# Load the embeddings
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

# Store the embeddings and splitted docs into vectorstore
vectorstore = Chroma.from_documents(documents=splitted_docs,embedding=hf_embeddings)

# Invoke the retriever to retrieve the documents from the vectorstore.
retriever = vectorstore.as_retriever()

  hf_embeddings = HuggingFaceBgeEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [5]:
retriever.get_relevant_documents("What are AI agents ?")

  retriever.get_relevant_documents("What are AI agents ?")


[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 13. The generative agent architecture. (Image source: Park et al. 2023)\nThis fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others).\nProof-of-Concept Examples#\nAutoGPT has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing.\nHere is the system message used by AutoGPT, where {{...}} are user inputs:\nYou are {{ai-name}}, {{user-provided AI bot description}}.\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and

In [6]:
# define the template and create the chatprompttemplate
template = """
You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):
"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [7]:
# create class to answer the questions
class GenerateQuestions(BaseModel):
    """ Structured the generated questions"""
    questions : List[str] = Field(...,description="List of generated questions")

In [8]:
# create the chain with structure output
generate_querries  = (
    prompt_rag_fusion
    | ChatGroq(model="llama-3.3-70b-versatile",temperature=0).with_structured_output(GenerateQuestions)
)

In [9]:
def call_generate_querries(question_dict:dict):
    question = question_dict["question"]
    questions_lst = generate_querries.invoke(question).questions
    return questions_lst

In [10]:
question = "What is task decomposition for LLM agents?"
question_dict = {"question": question}
call_generate_querries(question_dict)

['What is task decomposition?',
 'Task decomposition for LLM agents',
 'How to apply task decomposition to LLM agents',
 'Benefits of task decomposition for LLM agents']

In [11]:
def reciprocal_rank_fusion(results:list[list],k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents and an optional parameter k used in the RRF formula """
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
                
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

question = "What is task decomposition for LLM agents?"
retrieval_chain_rag_fusion = (RunnablePassthrough() 
                              | RunnableLambda(call_generate_querries)
                              | retriever.map() 
                              | reciprocal_rank_fusion)
docs = retrieval_chain_rag_fusion.invoke({"question": question})
docs

  (loads(doc), score)


[(Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first searc

In [12]:
# creation of final RAG chain

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatGroq(model="llama-3.3-70b-versatile",temperature=0)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition for LLM (Large Language Model) agents refers to the process of breaking down complex tasks into smaller, more manageable subgoals or steps. This is a crucial component of planning in LLM-powered autonomous agent systems, as it enables the agent to efficiently handle complex tasks.\n\nTask decomposition can be achieved through various methods, including:\n\n1. Chain of Thought (CoT): This involves instructing the model to "think step by step" to decompose hard tasks into smaller and simpler steps.\n2. Tree of Thoughts: This extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure.\n3. Simple prompting: The model can be prompted with simple questions like "Steps for XYZ" or "What are the subgoals for achieving XYZ?"\n4. Task-specific instructions: The model can be provided with task-specific instructions, such as "Write a story outline" for writing a novel.\n5. Human inputs: Task decomposition can also be done with human inputs

##### **Ensemble Retriever**

An Ensemble Retriever is a combination of multiple retrievers. Instead of relying on a single retrieval mechanism, it aggregates results from multiple retrievers to improve accuracy and recall.

How It Works
- Uses multiple retrievers (e.g., VectorStoreRetriever, BM25Retriever, MultiQueryRetriever).
- Aggregates the results based on different strategies like union, intersection, or weighted combination.
- Helps improve search accuracy by leveraging the strengths of different retrieval methods.

In [13]:
# This is dense retriever
dense_retreiver = retriever

# bm25 retriever
bm25_retriever = BM25Retriever.from_documents(splitted_docs)

# ensemble retreiver
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, dense_retreiver], weights=[0.5, 0.5])

In [14]:
# query the retriever
question = "What is task decompostion ?"
ensemble_retriever.invoke(question)

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='The AI assistant can parse user input to several tasks: [{"task": task, "id", task_id, "dep": dependency_task_ids, "args": {"text": text, "image": URL, "audio": URL, "video": URL}}]. The "dep" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag "-task_id" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.\n\n(2) Model selection: LLM distributes th

##### **Long Context Reorder**

**Long Context Reorder (LCR)** is a technique used to rearrange retrieved documents to improve their relevance before passing them to an LLM. This helps when processing long documents where ordering affects comprehension.

How it Works?
- Retrieve Documents: Fetch multiple documents using a retriever.
- Query Relevance: Rank documents based on closeness to the query.
- Chronological Order: Order documents based on timestamps.
- Semantic Clustering: Group similar documents together.
- Pass to LLM: After reordering, feed them into the language model.

In [15]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create a retriever
retriever = Chroma.from_documents(splitted_docs, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)
query = "What is task decomposition"

# Get relevant documents ordered by relevance score
docs = retriever.invoke(query)
docs

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search

"Now the documents are semantically grouped, increasing the chances of similar documents being positioned together. Depending on the reordering logic, key documents may appear earlier in the reordered list."

In [16]:
# perform the reordering using the LongContextReorder
reordering = LongContextReorder()
reordered_docs = reordering.transform_documents(docs)
reordered_docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 2.  Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: Yao et al. 2023).\nIn both experiments on knowledge-intensive tasks and decision-making tasks, ReAct works better than the Act-only baseline where Thought: … step is removed.\nReflexion (Shinn & Labash 2023) is a framework to equip agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action $a_t$, the agent computes a heuristic $h_t$ and optionally may decide to reset the environment to start a new trial depending on the self-reflection res

##### **Self-Querying / MetaData Retriever**

A Self-Querying Retriever is an advanced retrieval method in LangChain that allows an LLM to generate structured queries dynamically based on user input. Instead of just retrieving documents based on simple keyword search, it understands the query, extracts filters or metadata, and fetches relevant documents accordingly.

How it works ?
- User Query → The LLM analyzes the query and extracts relevant search parameters.
- Structured Query Generation → Converts the query into:
    - A filter (e.g., retrieving documents by date, category, or topic).
    - A search term (vector or keyword-based retrieval).
- Document Retrieval → Fetches documents based on these structured filters.

When to Use Self-Querying Retriever?
- ✔ If your dataset has structured metadata (e.g., date, author, category).
- ✔ When users need complex filtering (e.g., "Find all AI papers from 2023").
- ✔ For advanced search capabilities in RAG applications and LLM-powered assistants.

In [24]:
docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "science fiction",
            "rating": 9.9,
        },
    ),

    Document(
        page_content="A young man navigates love and friendship over the course of several years while backpacking through Europe.",
        metadata={"year": 2013, "director": "Ayan Mukerji", "rating": 7.9, "genre": "romance"},
    ),
    Document(
        page_content="A gritty cop hunts down a ruthless gangster in the heart of Mumbai, blurring the line between good and evil.",
        metadata={"year": 2012, "director": "Anurag Kashyap", "rating": 8.2, "genre": "crime"},
    ),
    Document(
        page_content="A man with short-term memory loss seeks revenge on the people who wronged him, using tattoos as clues.",
        metadata={"year": 2008, "director": "A.R. Murugadoss", "rating": 7.5, "genre": "thriller"},
    ),
    Document(
        page_content="A determined teacher transforms the life of a dyslexic child, unlocking his hidden potential.",
        metadata={"year": 2007, "director": "Aamir Khan", "rating": 8.4, "genre": "drama"},
    ),
    Document(
        page_content="A rebellious young man joins an underground radio show and sparks a revolution through storytelling.",
        metadata={"year": 2006, "director": "Rakeysh Omprakash Mehra", "rating": 8.3, "genre": "drama"},
    ),
    Document(
        page_content="A deaf and mute girl finds an unlikely mentor in a surly old man who helps her find her voice.",
        metadata={"year": 2005, "director": "Sanjay Leela Bhansali", "rating": 8.2, "genre": "drama"},
    ),
    Document(
        page_content="A struggling actor disguises himself as a woman to land a role in a television soap opera.",
        metadata={"year": 2012, "director": "Kamal Haasan", "rating": 7.7, "genre": "comedy"},
    ),
    Document(
        page_content="A gangster's rise and fall in the Mumbai underworld is told through the eyes of his closest associate.",
        metadata={"year": 1998, "director": "Ram Gopal Varma", "rating": 8.1, "genre": "crime"},
    ),
    Document(
        page_content="Two slackers get caught in a web of mistaken identities and hilarious coincidences.",
        metadata={"year": 2000, "director": "Rajkumar Santoshi", "rating": 8.1, "genre": "comedy"},
    ),
    Document(
        page_content="A father fights the system to prove that his autistic son is not a terrorist, but a misunderstood soul.",
        metadata={"year": 2010, "director": "Karan Johar", "rating": 7.9, "genre": "drama"},
    ),
]
vectorstore = Chroma.from_documents(docs, embeddings)

In [25]:
# define the metadata fields 
metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]
document_content_description = "Brief summary of a movie"
llm = ChatGroq(model="llama-3.3-70b-versatile",temperature=0)
retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

In [21]:
# This example only specifies a relevant query
retriever.invoke("What are some movies about dinosaurs")

[Document(metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),
 Document(metadata={'genre': 'animated', 'year': 1995}, page_content='Toys come alive and have a blast doing so'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with A3C for "dark" environments and DQN for watermaze.(Image source: Laskin et al. 2023)\nComponent Two: Memory#\n(Big thank you to ChatGPT for helping me draft this section. I’ve learned a lot about the human brain and data structure for fast MIPS in my conversations with ChatGPT.)\nTypes of Memory#\nMemory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human br

In [22]:
# This example only specifies a filter
retriever.invoke("I want to watch a movie rated higher than 8.5")

[Document(metadata={'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9, 'year': 1979}, page_content='Three men walk into the Zone, three men walk out of the Zone'),
 Document(metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]

In [23]:
# This example specifies a query and a filter
retriever.invoke("Has Greta Gerwig directed any movies about women")

[Document(metadata={'director': 'Greta Gerwig', 'rating': 8.3, 'year': 2019}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them')]

In [35]:
# This example gives the movies of karan johar
retriever.invoke("movies directed by ram gopal varma")

[Document(metadata={'director': 'Ram Gopal Varma', 'genre': 'crime', 'rating': 8.1, 'year': 1998}, page_content="A gangster's rise and fall in the Mumbai underworld is told through the eyes of his closest associate.")]

In [29]:
# This example gives the romantic movies release after 2006
retriever.invoke("Romantic movies release after 2006")

[Document(metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'),
 Document(metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'),
 Document(metadata={'director': 'Ayan Mukerji', 'genre': 'romance', 'rating': 7.9, 'year': 2013}, page_content='A young man navigates love and friendship over the course of several years while backpacking through Europe.'),
 Document(metadata={'director': 'Greta Gerwig', 'rating': 8.3, 'year': 2019}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them')]