# This is multi files agent
<hr>

In [1]:
import os

from dotenv import load_dotenv
load_dotenv()

import nest_asyncio
nest_asyncio.apply()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [9]:
papers = [
    "data/three/metagpt.pdf",
    "data/three/longlora.pdf",
    "data/three/selfrag.pdf",
]

<hr>

### Chunk and embedding

In [4]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("data/three").load_data()

In [5]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [6]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)

<hr>

### Tools
- vector tool

In [7]:
from typing import List
from llama_index.core.vector_stores import FilterCondition
from llama_index.core.tools import FunctionTool
from llama_index.core.vector_stores import MetadataFilters

def vector_query(
    query : str,
    page_numbers: List[str],
) -> str:
    """Performs a vector search over an index.
    
    query (str): the string query to embeded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to search
    over all pages. Otherwise, filter by the set of sepcified pages.
    """

    metadata_dicts = [
        {"key": "page_label", "value": p}
        for p in page_numbers
    ]

    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response

vector_query_tool = FunctionTool.from_defaults(
    name="vector_query",
    fn=vector_query,
)

- summary tool

In [8]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode='tree_summarize',
    use_async=True,
)
summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=(
        'Useful if you want to get a summary of the ragchecker.'
    )
)

### Search by the tools

In [10]:
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    paper_to_tools_dict[paper] = [vector_query_tool, summary_tool]

Getting tools for paper: data/three/metagpt.pdf
Getting tools for paper: data/three/longlora.pdf
Getting tools for paper: data/three/selfrag.pdf


put into a flat list

In [11]:
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

### LLM Model Setting
<hr>

In [17]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-3.5-turbo',temperature=0)

- each paper has TWO tools

In [18]:
len(initial_tools)

6

### Overall angent worker
<hr>

In [19]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools,
    llm=llm,
    verbose=True,
)

agent = AgentRunner(agent_worker)

Test with quetsion

In [20]:
response = agent.query(
    "Tell me about the evaluation dataset used in LongLoRA,"
    " and then tell me the evaluation results."
)

Added user message to memory: Tell me about the evaluation dataset used in LongLoRA, and then tell me the evaluation results.
=== Calling Function ===
Calling function: vector_query with args: {"query": "evaluation dataset used in LongLoRA", "page_numbers": ["7"]}
=== Function Output ===
The evaluation dataset used in LongLoRA is the book corpus dataset PG19 and the cleaned Arxiv Math proof-pile dataset.
=== Calling Function ===
Calling function: vector_query with args: {"query": "evaluation results of LongLoRA", "page_numbers": ["7"]}
=== Function Output ===
LongLoRA achieves promising results on extremely large settings, with some perplexity degradation observed on small context sizes for the extended models. It is noted that LongLoRA performs well in retrieval tasks on long contexts, showing comparable performance to state-of-the-art models with lower fine-tuning costs.
=== LLM Response ===
The evaluation dataset used in LongLoRA consists of the book corpus dataset PG19 and the clea

In [21]:
response = agent.query(
    "Gvie me a summary of both Self-RAG and LongLoRA."
)
print(str(response))

Added user message to memory: Gvie me a summary of both Self-RAG and LongLoRA.
=== Calling Function ===
Calling function: summary_tool with args: {"input": "Self-RAG"}
=== Function Output ===
Self-RAG is a framework that enhances the quality and factuality of large language models through retrieval on demand and self-reflection. It trains a language model to learn to retrieve, generate, and critique text passages and its own generation using special tokens called reflection tokens. This method allows for tailored behaviors at test time by leveraging reflection tokens, leading to significant improvements in performance, factuality, and citation accuracy compared to other models.
=== Calling Function ===
Calling function: summary_tool with args: {"input": "LongLoRA"}
=== Function Output ===
LongLoRA is an efficient fine-tuning approach that extends the context sizes of pre-trained large language models with limited computation cost. It combines improved LoRA with shifted sparse attention

The retrival tools for multiple docs
[Watch it here](https://www.bilibili.com/video/BV14t421u7AM/?spm_id_from=333.788&vd_source=674819236ef139055aebeaab34e08502)