In [1]:
%pip install helper nest_asyncio python-dotenv -U -q

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import nest_asyncio
from dotenv import load_dotenv
load_dotenv()

nest_asyncio.apply()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# print(OPENAI_API_KEY)

In [4]:
from typing import List
from llama_index.core.vector_stores import FilterCondition
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.vector_stores import MetadataFilters
from llama_index.core import VectorStoreIndex


# load documents
documents = SimpleDirectoryReader(input_files=["metagpt.pdf"]).load_data()

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)
# print(nodes[0].get_content(metadata_mode="all"))

vector_index = VectorStoreIndex(nodes)

def vector_query(
    query: str, 
    page_numbers: List[str]
) -> str:
    """Perform a vector search over an index.
    
    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search
        over all pages. Otherwise, filter by the set of specified pages.
    
    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]
    
    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response
    

vector_tool = FunctionTool.from_defaults(
    name="vector_tool",
    fn=vector_query
)


In [5]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=(
        "Useful if you want to get a summary of MetaGPT"
    ),
)

In [6]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner


llm = OpenAI(model="gpt-4-1106-preview", temperature=0)

agent_worker = FunctionCallingAgentWorker.from_tools(
    [vector_tool, summary_tool], 
    llm=llm, 
    verbose=True
)
agent = AgentRunner(agent_worker)

In [7]:
response = agent.query(
    "Tell me about the agent roles in MetaGPT, "
    "and then how they communicate with each other."
)

Added user message to memory: Tell me about the agent roles in MetaGPT, and then how they communicate with each other.
=== Calling Function ===
Calling function: summary_tool with args: {"input": "agent roles in MetaGPT"}
=== Function Output ===
The agent roles in MetaGPT include Product Manager, Architect, Project Manager, Engineer, and QA Engineer. The Product Manager is responsible for creating the Product Requirement Document and conducting competitive analysis. The Architect is in charge of designing the system architecture, interface definitions, and technical specifications. The Project Manager breaks down the project into tasks and assigns them to Engineers. Engineers develop the code based on the provided specifications. The QA Engineer reviews the code, creates unit tests, and ensures the software's quality. Each role is essential in the collaborative software development process within MetaGPT.
=== Calling Function ===
Calling function: summary_tool with args: {"input": "how

In [9]:
response = agent.chat(
    "Tell me about the evaluation datasets used,"
    "and also mention the results obtained for one of the datasets."
)

Added user message to memory: Tell me about the evaluation datasets used,and also mention the results obtained for one of the datasets.
=== Calling Function ===
Calling function: summary_tool with args: {"input": "evaluation datasets used in MetaGPT"}
=== Function Output ===
The evaluation datasets used in MetaGPT include HumanEval, MBPP, and a self-generated SoftwareDev dataset. The HumanEval dataset consists of 164 handwritten programming tasks, while the MBPP dataset comprises 427 Python tasks. The SoftwareDev dataset includes 70 representative software development tasks covering various scopes such as mini-games, image processing algorithms, and data visualization. These datasets were utilized to evaluate the performance of MetaGPT in code generation tasks.
=== Calling Function ===
Calling function: vector_tool with args: {"query": "results obtained for one of the evaluation datasets in MetaGPT", "page_numbers": []}
=== Function Output ===
The results obtained for one of the evalua