# Lesson 2: Tool Calling


## SETUP


In [8]:
from dotenv import load_dotenv, find_dotenv
import os

_ = load_dotenv(find_dotenv())
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [9]:
import nest_asyncio

nest_asyncio.apply()

## DEFINE SIMPLE TOOL


In [10]:
from llama_index.core.tools import FunctionTool


def add(x: int, y: int) -> int:
    return x + y


def mystery(x: int, y: int) -> int:
    return (x + y) * (x + y)


add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

In [48]:
from llama_index.llms.openai import OpenAI

llm_llamaindex = OpenAI(model="gpt-3.5-turbo")
response = llm_llamaindex.predict_and_call(
    [add_tool, mystery_tool],
    # "What is the output of adding 2 and 9",
    "Tell me the output of the mystery function on 2 and 9",
    verbose=True,
)

response

=== Calling Function ===
Calling function: mystery with args: {"x": 2, "y": 9}
=== Function Output ===
121


AgentChatResponse(response='121', sources=[ToolOutput(content='121', tool_name='mystery', raw_input={'args': (), 'kwargs': {'x': 2, 'y': 9}}, raw_output=121, is_error=False)], source_nodes=[], is_dummy_stream=False)

## DEFINE AN AUTO-RETRIEVAL TOOL


In [19]:
# load data as documents
pdf_path = "../files/metagpt.pdf"

from llama_index.legacy import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()

len(documents)

29

In [44]:
# get document chunks/nodes
from llama_index.legacy.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)
len(nodes)

34

In [51]:
# get node embeddings, create query engine
from llama_index.legacy import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [59]:
response = query_engine.query("What are some high-level results of MetaGPT?")

for n in response.source_nodes:
    print(n.metadata)

{'page_label': '7', 'file_name': 'metagpt.pdf', 'file_path': '../files/metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-06-10', 'last_modified_date': '2024-06-08', 'last_accessed_date': '2024-06-22'}
{'page_label': '23', 'file_name': 'metagpt.pdf', 'file_path': '../files/metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-06-10', 'last_modified_date': '2024-06-08', 'last_accessed_date': '2024-06-22'}


In [62]:
# view available metadata filters
nodes[0].metadata

{'page_label': '1',
 'file_name': 'metagpt.pdf',
 'file_path': '../files/metagpt.pdf',
 'file_type': 'application/pdf',
 'file_size': 16911937,
 'creation_date': '2024-06-10',
 'last_modified_date': '2024-06-08',
 'last_accessed_date': '2024-06-22'}

In [63]:
# create query engine w/ metadata filters
from llama_index.legacy.vector_stores import MetadataFilters

# define filter to seach only page label == 2 pages
query_engine_w_filters = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [
            {
                "key": "page_label",
                "value": "2",
            }
        ]
    ),
)

In [64]:
response_using_filters = query_engine_w_filters.query(
    "What are some high-level results of MetaGPT?"
)

for n in response_using_filters.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'metagpt.pdf', 'file_path': '../files/metagpt.pdf', 'file_type': 'application/pdf', 'file_size': 16911937, 'creation_date': '2024-06-10', 'last_modified_date': '2024-06-08', 'last_accessed_date': '2024-06-22'}


### define the auto-retrieval tool


In [71]:
# function to query the vector engine and return the response
from typing import List
from llama_index.legacy.vector_stores import FilterCondition  # to combine diff filters


def vector_query(query: str, page_numbers: List[str]) -> str:
    """Perform a vector search over an index.

    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search over all pages. Else, filter by the set of specified pages.
    """

    # define the metadata filter to use
    metadata_dicts = [
        {
            "key": "page_label",
            "value": page,
        }
        for page in page_numbers
    ]

    # define the query vector engine
    query_engine_w_filters = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR,  # if there are more than one filter in the metadata_dicts use OR logic
        ),
    )

    # get response from the engine
    response_using_filters = query_engine_w_filters.query(query)
    return response_using_filters


tool_vector_query = FunctionTool.from_defaults(
    name="tool_vector_query", fn=vector_query
)

In [73]:
# define an llm
llm_llamaindex = OpenAI(api_key=OPENAI_API_KEY, model="gpt-3.5-turbo", temperature=0)

query = "What are the high-level results of MetaGPT as described on page 2?"

response = llm_llamaindex.predict_and_call(
    [tool_vector_query],  # tool to use
    query,  # query for tools to use
    verbose=True,
)

=== Calling Function ===
Calling function: tool_vector_query with args: {"query": "high-level results of MetaGPT", "page_numbers": ["2"]}
=== Function Output ===
MetaGPT achieves a new state-of-the-art (SoTA) in code generation benchmarks with 85.9% and 87.7% in Pass@1. It stands out in handling higher levels of software complexity and offering extensive functionality, demonstrating a 100% task completion rate in experimental evaluations.


In [108]:
from pprint import pprint as pp

for i in range(0, (len(response.sources))):
    pp(response.sources[i].raw_output.metadata)

{'604e1c0a-926a-4b60-82a5-12820ce0b679': {'creation_date': '2024-06-10',
                                          'file_name': 'metagpt.pdf',
                                          'file_path': '../files/metagpt.pdf',
                                          'file_size': 16911937,
                                          'file_type': 'application/pdf',
                                          'last_accessed_date': '2024-06-22',
                                          'last_modified_date': '2024-06-08',
                                          'page_label': '2'}}


## MORE TOOLS

In [109]:
from llama_index.legacy import SummaryIndex
from llama_index.legacy.tools import QueryEngineTool

In [110]:
# create summary embeddings
summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)

In [112]:
# create the summary engine as a tool
tool_summary = QueryEngineTool.from_defaults(
    name="tool_summary",
    query_engine=summary_query_engine,
    description=("Useful if you want to get a summary of MetaGPT"),
)

In [115]:
query = "What are the MetaGPT comparisons with ChatDev described on page 8?"
query

'What are the MetaGPT comparisons with ChatDev described on page 8?'

In [116]:
response = llm_llamaindex.predict_and_call(
    [tool_vector_query, tool_summary],
    query,
    verbose=True,
)

=== Calling Function ===
Calling function: tool_vector_query with args: {"query": "MetaGPT comparisons with ChatDev", "page_numbers": ["8"]}
=== Function Output ===
MetaGPT outperforms ChatDev on the SoftwareDev dataset in various aspects. For example, MetaGPT achieves a higher score in executability, takes less time for execution, uses more tokens but requires fewer tokens to generate one line of code compared to ChatDev. Additionally, MetaGPT surpasses ChatDev in code statistic metrics and human revision cost, showcasing the advantages of utilizing SOPs in collaborative efforts between multiple agents.


In [120]:
response.response

'The paper does not discuss the impact of climate change on biodiversity and ecosystems.'

In [118]:
from pprint import pprint as pp

for i in range(0, (len(response.sources))):
    pp(response.sources[i].raw_output.metadata)

{'649836fa-0944-449c-8674-a53b34d9f066': {'creation_date': '2024-06-10',
                                          'file_name': 'metagpt.pdf',
                                          'file_path': '../files/metagpt.pdf',
                                          'file_size': 16911937,
                                          'file_type': 'application/pdf',
                                          'last_accessed_date': '2024-06-22',
                                          'last_modified_date': '2024-06-08',
                                          'page_label': '8'}}


In [119]:
response = llm.predict_and_call(
    [tool_vector_query, tool_summary], "What is a summary of the paper?", verbose=True
)

=== Calling Function ===
Calling function: tool_summary with args: {"input": "The paper discusses the impact of climate change on biodiversity and ecosystems."}
=== Function Output ===
The paper does not discuss the impact of climate change on biodiversity and ecosystems.
