# Building a Multi-Document Agentic RAG

## Helper function

In [None]:
import os
from dotenv import load_dotenv, find_dotenv

# these expect to find a .env file                                                                                                                     # the format for that file is (without the comment)                                                                                                                                       #API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService                                                                                                                                     
def load_env():
    _ = load_dotenv(find_dotenv())

def get_openai_api_key():
    load_env()
    openai_api_key = os.getenv("OPENAI_API_KEY")
    return openai_api_key


In [1]:
OPENAI_API_KEY = get_openai_api_key()

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import FunctionTool, QueryEngineTool
from llama_index.core.vector_stores import MetadataFilters, FilterCondition
from typing import List, Optional

def get_doc_tools(
    file_path: str,
    name: str,
) -> str:
    """Get vector query and summary query tools from a document."""

    # load documents
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)
    vector_index = VectorStoreIndex(nodes)
    
    def vector_query(
        query: str, 
        page_numbers: Optional[List[str]] = None
    ) -> str:
        """Use to answer questions over a given paper.
    
        Useful if you have specific questions over the paper.
        Always leave page_numbers as None UNLESS there is a specific page you want to search for.
    
        Args:
            query (str): the string query to be embedded.
            page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE 
                if we want to perform a vector search
                over all pages. Otherwise, filter by the set of specified pages.
        
        """
    
        page_numbers = page_numbers or []
        metadata_dicts = [
            {"key": "page_label", "value": p} for p in page_numbers
        ]
        
        query_engine = vector_index.as_query_engine(
            similarity_top_k=2,
            filters=MetadataFilters.from_dicts(
                metadata_dicts,
                condition=FilterCondition.OR
            )
        )
        response = query_engine.query(query)
        return response
        
    
    vector_query_tool = FunctionTool.from_defaults(
        name=f"vector_tool_{name}",
        fn=vector_query
    )
    
    summary_index = SummaryIndex(nodes)
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
    )
    summary_tool = QueryEngineTool.from_defaults(
        name=f"summary_tool_{name}",
        query_engine=summary_query_engine,
        description=(
            f"Useful for summarization questions related to {name}"
        ),
    )

    return vector_query_tool, summary_tool


## Setup an agent over 3 papers

In [3]:
urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=hSyW5go0v8",
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "selfrag.pdf",
]

In [4]:
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]

Getting tools for paper: metagpt.pdf
Getting tools for paper: longlora.pdf
Getting tools for paper: selfrag.pdf


In [5]:
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [6]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")

In [7]:
len(initial_tools)

6

In [8]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools, 
    llm=llm, 
    verbose=True
)
agent = AgentRunner(agent_worker)

In [9]:
response = agent.query(
    "Tell me about the evaluation dataset used in LongLoRA, "
    "and then tell me about the evaluation results"
)

Added user message to memory: Tell me about the evaluation dataset used in LongLoRA, and then tell me about the evaluation results
=== Calling Function ===
Calling function: vector_tool_longlora with args: {"query": "evaluation dataset"}
=== Function Output ===
PG19 test split
=== Calling Function ===
Calling function: vector_tool_longlora with args: {"query": "evaluation results"}
=== Function Output ===
The evaluation results show that the models achieve better perplexity with longer context sizes. Increasing the context window size leads to improved perplexity values. Additionally, the models are fine-tuned on different context lengths, such as 100k, 65536, and 32768, and achieve promising results on these extremely large settings. However, there is some perplexity degradation observed on small context sizes for the extended models, which is a known limitation of Position Interpolation.
=== LLM Response ===
The evaluation dataset used in LongLoRA is the PG19 test split. 

Regarding 

In [32]:
response = agent.query(
    "Tell me about the evaluation dataset used in LongLoRA, "
    "and then tell me about the evaluation results"
)

Added user message to memory: Tell me about the evaluation dataset used in LongLoRA, and then tell me about the evaluation results
=== Calling Function ===
Calling function: summary_tool_longlora with args: {"input": "evaluation dataset"}
=== Function Output ===
The evaluation dataset used in the experiments described in the provided context is PG19.
=== Calling Function ===
Calling function: summary_tool_longlora with args: {"input": "evaluation results"}
=== Function Output ===
The evaluation results demonstrate that the proposed methods, such as LongLoRA, S2-Attn, and LoRA+, achieve comparable performance to full attention or full fine-tuning baselines, while being more efficient in terms of computational cost. Different attention patterns were analyzed, showing varying impacts on model performance during fine-tuning. The models also showed improved perplexity scores as the evaluation context length increased and achieved state-of-the-art performance on various benchmarks, showcasin

In [10]:
response = agent.query("Give me a summary of both Self-RAG and LongLoRA")
print(str(response))

Added user message to memory: Give me a summary of both Self-RAG and LongLoRA
=== Calling Function ===
Calling function: summary_tool_selfrag with args: {"input": "Self-RAG"}
=== Function Output ===
Self-RAG is a framework that enhances the quality and factuality of a large language model through retrieval and self-reflection. It involves training a single arbitrary language model to adaptively retrieve passages on-demand, generate text informed by retrieved passages, and reflect on its own generations using special tokens called reflection tokens. This framework improves the generation quality and factuality of the language model without compromising its original creativity and versatility.
=== Calling Function ===
Calling function: summary_tool_longlora with args: {"input": "LongLoRA"}
=== Function Output ===
LongLoRA is an efficient method for extending the context length of Large Language Models (LLMs) while minimizing computational resources compared to full fine-tuning. It combin

### Setup an agent over 11 papers

In [18]:
urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=LzPWWPAdY4",
    "https://openreview.net/pdf?id=VTF8yNQM66",
    "https://openreview.net/pdf?id=hSyW5go0v8",
    "https://openreview.net/pdf?id=9WD9KwssyT",
    "https://openreview.net/pdf?id=yV6fD7LYkF",
    "https://openreview.net/pdf?id=hnrB5YHoYu",
    "https://openreview.net/pdf?id=WbWtOYIzIK",
    "https://openreview.net/pdf?id=c5pwL0Soay",
    "https://openreview.net/pdf?id=TpD2aG1h0D"
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "loftq.pdf",
    "swebench.pdf",
    "selfrag.pdf",
    "zipformer.pdf",
    "values.pdf",
    "finetune_fair_diffusion.pdf",
    "knowledge_card.pdf",
    "metra.pdf",
    "vr_mcl.pdf"
]

To download these papers, below is the needed code:


    #for url, paper in zip(urls, papers):
         #!wget "{url}" -O "{paper}"
    

In [19]:
paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]

Getting tools for paper: metagpt.pdf
Getting tools for paper: longlora.pdf
Getting tools for paper: loftq.pdf
Getting tools for paper: swebench.pdf
Getting tools for paper: selfrag.pdf
Getting tools for paper: zipformer.pdf
Getting tools for paper: values.pdf
Getting tools for paper: finetune_fair_diffusion.pdf
Getting tools for paper: knowledge_card.pdf
Getting tools for paper: metra.pdf
Getting tools for paper: vr_mcl.pdf


In [20]:
paper_to_tools_dict

{'metagpt.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f78fc05d0>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78f43a50>],
 'longlora.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f5c3d4910>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f5c3ef750>],
 'loftq.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f78d9b390>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78d9b750>],
 'swebench.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f5c4e78d0>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78b87f10>],
 'selfrag.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f758a7550>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f758aa190>],
 'zipformer.pdf': [<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f78d99310>,
  <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f758a1010>],
 'values.pdf':

### Extend the Agent with Tool Retrieval

In [21]:
all_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]
all_tools

[<llama_index.core.tools.function_tool.FunctionTool at 0x7f8f78fc05d0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78f43a50>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x7f8f5c3d4910>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f5c3ef750>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x7f8f78d9b390>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78d9b750>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x7f8f5c4e78d0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78b87f10>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x7f8f758a7550>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f758aa190>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x7f8f78d99310>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f758a1010>,
 <llama_index.core.tools.function_tool.FunctionTool at 0x7f8f5c3fbbd0>,
 <llama_index.core.tools.query_engine.QueryEngineToo

In [22]:
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

In [23]:
obj_retriever = obj_index.as_retriever(similarity_top_k=3)

In [24]:
tools = obj_retriever.retrieve(
    "Tell me about the eval dataset used in MetaGPT and SWE-Bench"
)

In [25]:
tools

[<llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78f43a50>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78d29550>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x7f8f78b87f10>]

In [26]:
tools[2].metadata

ToolMetadata(description='Useful for summarization questions related to swebench', name='summary_tool_swebench', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)

In [28]:
tools[0].metadata

ToolMetadata(description='Useful for summarization questions related to metagpt', name='summary_tool_metagpt', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)

In [29]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    tool_retriever=obj_retriever,
    llm=llm, 
    system_prompt=""" \
You are an agent designed to answer queries over a set of given papers.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

""",
    verbose=True
)
agent = AgentRunner(agent_worker)

In [30]:
response = agent.query(
    "Tell me about the evaluation dataset used "
    "in MetaGPT and compare it against SWE-Bench"
)
print(str(response))

Added user message to memory: Tell me about the evaluation dataset used in MetaGPT and compare it against SWE-Bench
=== Calling Function ===
Calling function: summary_tool_metagpt with args: {"input": "evaluation dataset used in MetaGPT"}
=== Function Output ===
The evaluation dataset used in MetaGPT includes HumanEval, MBPP, and SoftwareDev.
=== Calling Function ===
Calling function: summary_tool_swebench with args: {"input": "comparison of evaluation dataset in MetaGPT and SWE-Bench"}
=== Function Output ===
The evaluation dataset in MetaGPT involves tasks related to generating text based on prompts across various domains, while SWE-Bench's dataset focuses on software engineering tasks derived from real GitHub issues and pull requests in Python repositories. MetaGPT's dataset includes language understanding, generation, and reasoning tasks, whereas SWE-Bench's dataset comprises challenges like patch generation, reasoning over long contexts, and navigating codebases. The tasks in SWE-

In [31]:
response = agent.query(
    "Compare and contrast the LoRA papers (LongLoRA, LoftQ). "
    "Analyze the approach in each paper first. "
)

Added user message to memory: Compare and contrast the LoRA papers (LongLoRA, LoftQ). Analyze the approach in each paper first. 
=== Calling Function ===
Calling function: summary_tool_longlora with args: {"input": "Approach in LongLoRA"}
=== Function Output ===
The approach in LongLoRA involves efficiently extending the context windows of pre-trained large language models (LLMs) using shifted sparse attention (S2-Attn) during fine-tuning. This method allows for training models with longer context lengths while maintaining the original attention architecture during inference. By combining low-rank adaptation (LoRA) with S2-Attn, LongLoRA achieves significant computational savings compared to traditional methods, emphasizing the importance of learnable embedding and normalization layers for effective and efficient long context fine-tuning.
=== Calling Function ===
Calling function: summary_tool_loftq with args: {"input": "Approach in LoftQ"}
=== Function Output ===
The approach in LoftQ