In [1]:
from helper import get_openai_api_key
OPENAI_API_KEY = get_openai_api_key()

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=hSyW5go0v8",
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "selfrag.pdf",
]

In [4]:
from utils import get_doc_tools
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    vector_tool, summary_tool = get_doc_tools(paper, Path(paper).stem)
    paper_to_tools_dict[paper] = [vector_tool, summary_tool]

Getting tools for paper: metagpt.pdf
Getting tools for paper: longlora.pdf
Getting tools for paper: selfrag.pdf


In [5]:
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [6]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")

In [7]:
len(initial_tools)

6

In [8]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools, 
    llm=llm, 
    verbose=True
)
agent = AgentRunner(agent_worker)

In [10]:
response = agent.query(
    "Tell me about the self-rag training used in selfrag, "
    "and then tell me about the self-rag inference"
)

Added user message to memory: Tell me about the self-rag training used in selfrag, and then tell me about the self-rag inference
=== Calling Function ===
Calling function: vector_tool_selfrag with args: {"query": "self-rag training"}
=== Function Output ===
SELF-RAG significantly reduces training costs compared to PPO. It learns to generate special tokens to evaluate its own prediction after each generated segment, enabling the use of a soft re-ranking mechanism or hard constraints at inference.
=== Calling Function ===
Calling function: vector_tool_selfrag with args: {"query": "self-rag inference"}
=== Function Output ===
SELF-RAG's inference phase involves generating reflection tokens to self-evaluate its own output, allowing the model to tailor its behavior to different task requirements. This self-evaluation process enables the model to control its behavior during inference, ensuring that it can adjust its output based on the specific demands of the task at hand.
=== LLM Response =