In [1]:
import openai
import os
import random
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

llm = OpenAI(model="gpt-4")
Settings.llm = llm
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [3]:
from llama_index.core import SimpleDirectoryReader
wiki_titles = ["Toronto", "Seattle", "Chicago", "Boston", "Houston"]

In [5]:
# from pathlib import Path

# import requests

# for title in wiki_titles:
#     response = requests.get(
#         "https://en.wikipedia.org/w/api.php",
#         params={
#             "action": "query",
#             "format": "json",
#             "titles": title,
#             "prop": "extracts",
#             # 'exintro': True,
#             "explaintext": True,
#         },
#     ).json()
#     page = next(iter(response["query"]["pages"].values()))
#     wiki_text = page["extract"]

#     data_path = Path("data/wiki")
#     if not data_path.exists():
#         Path.mkdir(data_path)

#     with open(data_path / f"{title}.txt", "w") as fp:
#         fp.write(wiki_text)

In [4]:
city_docs = {}
for wiki_title in wiki_titles:
    city_docs[wiki_title] = SimpleDirectoryReader(
        input_files=[f"data/wiki/{wiki_title}.txt"]
    ).load_data()

In [5]:
from llama_index.core import VectorStoreIndex
from llama_index.agent.openai import OpenAIAgent
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core import VectorStoreIndex

# Build tool dictionary
tool_dict = {}

for wiki_title in wiki_titles:
    # build vector index
    vector_index = VectorStoreIndex.from_documents(
        city_docs[wiki_title],
    )
    # define query engines
    vector_query_engine = vector_index.as_query_engine(llm=llm)

    # define tools
    vector_tool = QueryEngineTool(
        query_engine=vector_query_engine,
        metadata=ToolMetadata(
            name=wiki_title,
            description=("Useful for questions related to" f" {wiki_title}"),
        ),
    )
    tool_dict[wiki_title] = vector_tool

In [6]:
print(len(tool_dict))
print(tool_dict.keys())
print(tool_dict["Toronto"].metadata.description)
print(tool_dict["Toronto"].query_engine.query("Where is Toronto?"))
print(list(tool_dict.values()))

5
dict_keys(['Toronto', 'Seattle', 'Chicago', 'Boston', 'Houston'])
Useful for questions related to Toronto
Toronto is located in the Canadian province of Ontario, at the western end of Lake Ontario.
[<llama_index.core.tools.query_engine.QueryEngineTool object at 0x17e75f440>, <llama_index.core.tools.query_engine.QueryEngineTool object at 0x17f825d60>, <llama_index.core.tools.query_engine.QueryEngineTool object at 0x17f825b50>, <llama_index.core.tools.query_engine.QueryEngineTool object at 0x17f8257f0>, <llama_index.core.tools.query_engine.QueryEngineTool object at 0x17f06dc40>]


In [7]:
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

tool_index = ObjectIndex.from_objects(
    list(tool_dict.values()),
    index_cls=VectorStoreIndex,
)
tool_retriever = tool_index.as_retriever(similarity_top_k=1)

In [14]:
from llama_index.core.llms import ChatMessage
from llama_index.core import ChatPromptTemplate
from typing import List

GEN_SYS_PROMPT_STR = """\
Task information is given below. 

Given the task, please generate a system prompt for an OpenAI-powered bot to solve this task: 
{task} \
"""

gen_sys_prompt_messages = [
    ChatMessage(
        role="system",
        content="You are helping to build a system prompt for another bot.",
    ),
    ChatMessage(role="user", content=GEN_SYS_PROMPT_STR),
]

GEN_SYS_PROMPT_TMPL = ChatPromptTemplate(gen_sys_prompt_messages)


agent_cache = {}

from llama_index.core.agent import ReActAgent

def create_system_prompt(task: str):
    """Create system prompt for another agent given an input task."""
    llm = OpenAI(llm="gpt-4")
    fmt_messages = GEN_SYS_PROMPT_TMPL.format_messages(task=task)
    response = llm.chat(fmt_messages)
    return response.message.content


def get_tools(task: str):
    """Get the set of relevant tools to use given an input task."""
    subset_tools = tool_retriever.retrieve(task)
    return [t.metadata.name for t in subset_tools]


def create_agent(system_prompt: str, tool_names: List[str]):
    """Create an agent given a system prompt and an input set of tools."""
    llm = OpenAI(model="gpt-4")
    try:
        # get the list of tools
        input_tools = [tool_dict[tn] for tn in tool_names]

        agent = ReActAgent.from_tools(input_tools, llm=llm, verbose=True)
        print(agent)
        agent_cache["agent"] = agent
        return_msg = "Agent created successfully."
    except Exception as e:
        return_msg = f"An error occurred when building an agent. Here is the error: {repr(e)}"
    return return_msg

In [15]:
from llama_index.core.tools import FunctionTool

system_prompt_tool = FunctionTool.from_defaults(fn=create_system_prompt)
get_tools_tool = FunctionTool.from_defaults(fn=get_tools)
create_agent_tool = FunctionTool.from_defaults(fn=create_agent)

In [17]:
GPT_BUILDER_SYS_STR = """\
You are helping to construct an agent given a user-specified task. You should generally use the tools in this order to build the agent.

1) Create system prompt tool: to create the system prompt for the agent.
2) Get tools tool: to fetch the candidate set of tools to use.
3) Create agent tool: to create the final agent.
"""

prefix_msgs = [ChatMessage(role="system", content=GPT_BUILDER_SYS_STR)]


builder_agent = OpenAIAgent.from_tools(
    tools=[system_prompt_tool, get_tools_tool, create_agent_tool],
    prefix_messages=prefix_msgs,
    verbose=True,
)

In [18]:

builder_agent.query("Build an agent that can tell me about Toronto.")

Added user message to memory: Build an agent that can tell me about Toronto.
=== Calling Function ===
Calling function: create_system_prompt with args: {
  "task": "tell me about Toronto"
}
Got output: "Generate a brief summary about Toronto, including its history, landmarks, culture, and notable features."

=== Calling Function ===
Calling function: get_tools with args: {
  "task": "tell me about Toronto"
}
Got output: ['Toronto']

=== Calling Function ===
Calling function: create_agent with args: {
  "system_prompt": "Generate a brief summary about Toronto, including its history, landmarks, culture, and notable features.",
  "tool_names": ["Toronto"]
}
<llama_index.core.agent.react.base.ReActAgent object at 0x282a342c0>
Got output: Agent created successfully.



Response(response='The agent has been successfully created. It will provide you with a brief summary about Toronto, including its history, landmarks, culture, and notable features.', source_nodes=[], metadata=None)

In [20]:
city_agent = agent_cache["agent"]
print(city_agent.query("Where is Toronto?"))

> Running step 98329262-eeb6-4686-938f-2bc9493b5ba7. Step input: Where is Toronto?
[1;3;38;5;200mThought: The user is asking about the location of Toronto. I can use the Toronto tool to get this information.
Action: Toronto
Action Input: {'input': 'Where is Toronto?'}
[0m[1;3;34mObservation: Toronto is located in the Canadian province of Ontario, on the northwestern shore of Lake Ontario.
[0m> Running step 3e9c69cc-399a-440b-8063-8732293b37de. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer.
Answer: Toronto is located in the Canadian province of Ontario, on the northwestern shore of Lake Ontario.
[0mToronto is located in the Canadian province of Ontario, on the northwestern shore of Lake Ontario.
