In [4]:
%pip install llama-index -U

Note: you may need to restart the kernel to use updated packages.


In [16]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleKeywordTableIndex,
    SimpleDirectoryReader,
)
from llama_index.core import SummaryIndex
from llama_index.core.schema import IndexNode
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.core.callbacks import CallbackManager


In [8]:
os.environ["OPENAI_API_KEY"] = ""
openai.api_key = os.environ["OPENAI_API_KEY"]

In [9]:
wiki_titles = [
    "Serie A",
    "Premier League",
    "La Liga",
    "Ligue 1",
    "Bundesliga",
]

In [10]:
from pathlib import Path
import requests
for title in wiki_titles:
    params = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            "explaintext": True,
        },
    ).json()
    page = next(iter(params["query"]["pages"].values()))
    wiki_text = page["extract"]
    data_path = Path("data")
    if not data_path.exists():
        Path.mkdir(data_path)
    with open(data_path / f"{title}.txt", "w") as f:
        f.write(wiki_text)

In [11]:
leagues_docs = {}
for wiki_title in wiki_titles:
    leagues_docs[wiki_title] = SimpleDirectoryReader(
        input_files=[f"data/{wiki_title}.txt"]
    ).load_data()

In [17]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(temperature=0, model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")


In [18]:
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
import os

node_parser = SentenceSplitter()

# Build agents dictionary
agents = {}
query_engines = {}

# this is for the baseline
all_nodes = []

for idx, wiki_title in enumerate(wiki_titles):
    nodes = node_parser.get_nodes_from_documents(leagues_docs[wiki_title])
    all_nodes.extend(nodes)

    if not os.path.exists(f"./data/{wiki_title}"):
        # build vector index
        vector_index = VectorStoreIndex(nodes)
        vector_index.storage_context.persist(
            persist_dir=f"./data/{wiki_title}"
        )
    else:
        vector_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=f"./data/{wiki_title}"),
        )

    # build summary index
    summary_index = SummaryIndex(nodes)
    # define query engines
    vector_query_engine = vector_index.as_query_engine(llm=Settings.llm)
    summary_query_engine = summary_index.as_query_engine(llm=Settings.llm)

    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name="vector_tool",
                description=(
                    "Useful for questions related to specific aspects of"
                    f" {wiki_title} (e.g. the history, teams "
                    "and performance in EU, or more)."
                ),
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name="summary_tool",
                description=(
                    "Useful for any requests that require a holistic summary"
                    f" of EVERYTHING about {wiki_title}. For questions about"
                    " more specific sections, please use the vector_tool."
                ),
            ),
        ),
    ]

    # build agent
    function_llm = OpenAI(model="gpt-4o-mini")
    agent = OpenAIAgent.from_tools(
        query_engine_tools,
        llm=function_llm,
        verbose=True,
        system_prompt=f"""\
You are a specialized agent designed to answer queries about {wiki_title}.
You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
""",
    )

    agents[wiki_title] = agent
    query_engines[wiki_title] = vector_index.as_query_engine(
        similarity_top_k=2
    )

In [27]:
# define tool for each document agent
all_tools = []
for wiki_title in wiki_titles:
    wiki_summary = (
        f"This content contains Wikipedia articles about {wiki_title}. Use"
        f" this tool if you want to answer any questions about {wiki_title}.\n"
    )
    doc_tool = QueryEngineTool(
        query_engine=agents[wiki_title],
        metadata=ToolMetadata(
            name=f"tool_{wiki_title.replace(' ', '_')}",
            description=wiki_summary,
        ),
    )
    all_tools.append(doc_tool)

In [28]:
# define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex, SimpleToolNodeMapping

tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)
obj_index = ObjectIndex.from_objects(
    all_tools,
    tool_mapping=tool_mapping,
    index_cls=VectorStoreIndex,
)

In [29]:
from llama_index.agent.openai import OpenAIAgent

top_agent = OpenAIAgent.from_tools(
    tool_retriever=obj_index.as_retriever(similarity_top_k=3),
    system_prompt=""" \
You are an agent designed to answer queries about the European top football leagues.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\

""",
    verbose=True,
)

In [30]:
base_index = VectorStoreIndex(all_nodes)
base_query_engine = base_index.as_query_engine(similarity_top_k=4)

In [31]:
# should use Boston agent -> vector tool
response = top_agent.query("Tell me about the history and UCL performance of La Liga")
print(response)

Added user message to memory: Tell me about the history and UCL performance of La Liga
=== Calling Function ===
Calling function: tool_La_Liga with args: {"input": "history"}
Added user message to memory: history
=== Calling Function ===
Calling function: summary_tool with args: {"input":"history of La Liga"}
Got output: La Liga, officially known as LaLiga EA Sports since 2023, was founded in April 1928 when José María Acha proposed the idea of a national league in Spain. The first season commenced in 1929 with ten teams, including notable clubs like Barcelona, Real Madrid, and Athletic Bilbao. 

In the 1930s, Athletic Bilbao emerged as a dominant force, winning multiple titles, while Barcelona won the inaugural league. The league was suspended during the Spanish Civil War, and in 1937, a separate Mediterranean League was formed, with Barcelona as the champion.

After the war, Atlético Madrid, Valencia, and Barcelona became the strongest clubs in the 1940s. The 1950s saw the rise of bo

In [32]:
# baseline
response_base = base_query_engine.query(
    "Please compare Premier League and La Liga in terms of history and UCL performance"
)
print(str(response))

### History of La Liga

La Liga, officially known as LaLiga EA Sports since 2023, was founded in April 1928 when José María Acha proposed the idea of a national league in Spain. The inaugural season commenced in 1929 with ten teams, including notable clubs like Barcelona, Real Madrid, and Athletic Bilbao.

- **1930s**: Athletic Bilbao emerged as a dominant force, winning multiple titles, while Barcelona won the first league title. The league was suspended during the Spanish Civil War, and in 1937, a separate Mediterranean League was formed, with Barcelona as the champion.
  
- **1940s**: After the war, Atlético Madrid, Valencia, and Barcelona became the strongest clubs.

- **1950s**: The rise of both FC Barcelona and Real Madrid occurred, with each club winning four titles during the decade. Real Madrid began a period of dominance in the 1960s and 1970s, winning 14 titles.

- **1980s**: Real Madrid continued its success, alongside the emergence of Basque clubs like Real Sociedad and At

In [33]:
response_top = top_agent.query("Please compare Premier League and La Liga in terms of history and UCL performance")
print(response)

Added user message to memory: Please compare Premier League and La Liga in terms of history and UCL performance
=== Calling Function ===
Calling function: tool_Premier_League with args: {"input": "history and UEFA Champions League performance"}
Added user message to memory: history and UEFA Champions League performance
=== Calling Function ===
Calling function: summary_tool with args: {"input": "history of the Premier League"}
Got output: The Premier League's history began in the late 1980s when English football faced significant challenges, including deteriorating stadium conditions and a ban on English clubs in European competitions due to the Heysel Stadium disaster. By the early 1990s, the situation began to improve, highlighted by England's semi-final run in the 1990 FIFA World Cup and the lifting of the European ban.

In 1991, a proposal for a new league emerged, leading to the signing of the Founder Members Agreement on July 17, 1991. This agreement established the FA Premier Le