In [None]:
!pip install llama-index pypdf pandas

In [3]:
import openai
openai.api_key = '<your open api key>'

In [4]:
import pandas as pd
from llama_index.core.query_engine import PandasQueryEngine

population_path = "/content/World Population.csv"
population_df = pd.read_csv(population_path)

population_df.head()

Unnamed: 0,Rank,Country,Population2023,YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,MedianAge,UrbanPop%,WorldShare
0,36,Afghanistan,42239854,2.70 %,1111083,65,652860,-65846,4.4,17.0,26 %,0.53 %
1,138,Albania,2832439,-0.35 %,-9882,103,27400,-8000,1.4,38.0,67 %,0.04 %
2,34,Algeria,45606480,1.57 %,703255,19,2381740,-9999,2.8,28.0,75 %,0.57 %
3,212,American Samoa,43914,-0.81 %,-359,220,200,-790,2.2,29.0,N.A.,0.00 %
4,202,Andorra,80088,0.33 %,264,170,470,200,1.1,43.0,85 %,0.00 %


In [5]:
from llama_index.core import PromptTemplate

instruction_str = """\
    1. Convert the query to executable Python code using Pandas.
    2. The final line of code should be a Python expression that can be called with the `eval()` function.
    3. The code should represent a solution to the query.
    4. PRINT ONLY THE EXPRESSION.
    5. Do not quote the expression."""

new_prompt = PromptTemplate(
    """\
    You are working with a pandas dataframe in Python.
    The name of the dataframe is `df`.
    This is the result of `print(df.head())`:
    {df_str}

    Follow these instructions:
    {instruction_str}
    Query: {query_str}

    Expression: """
)

context = """Purpose: The primary role of this agent is to assist users by providing accurate
            information about world population statistics and details about a country. """

In [6]:
population_query_engine = PandasQueryEngine(df=population_df, verbose=True, instruction_str=instruction_str)
population_query_engine.update_prompts({"pandas_prompt": new_prompt})

population_query_engine.query("what is the population of Canada?")

> Pandas Instructions:
```
df[df['Country'] == 'Canada']['Population2023'].values[0]
```
> Pandas Output: 38781291


Response(response='38781291', source_nodes=[], metadata={'pandas_instruction_str': "df[df['Country'] == 'Canada']['Population2023'].values[0]", 'raw_pandas_output': '38781291'})

In [8]:
from llama_index.core.tools import FunctionTool

note_file = "/content/notes.txt"


def save_note(note):
    if not os.path.exists(note_file):
        open(note_file, "w")

    with open(note_file, "a") as f:
        f.writelines([note + "\n"])

    return "note saved"


note_engine = FunctionTool.from_defaults(
    fn=save_note,
    name="note_saver",
    description="this tool can save a text based note to a file for the user",
)

In [12]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI

tools = [
    note_engine,
    QueryEngineTool(
        query_engine=population_query_engine,
        metadata=ToolMetadata(
            name="population_data",
            description="this gives information at the world population and demographics",
        ),
    ),
]

llm = OpenAI(model="gpt-3.5-turbo-0613")
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)

while (prompt := input("Enter a prompt (q to quit): ")) != "q":
    result = agent.query(prompt)
    print(result)



Enter a prompt (q to quit): say a note for me saying "this is testing"
[1;3;38;5;200mThought: The current language of the user is: English. I can use the note_saver tool to save a note for the user.
Action: note_saver
Action Input: {'note': 'This is testing'}
[0m[1;3;34mObservation: note saved
[0m[1;3;38;5;200mThought: I have successfully saved the note for the user.
Answer: I have saved a note for you. The note says "This is testing."
[0mI have saved a note for you. The note says "This is testing."
Enter a prompt (q to quit): what is the population of Japan?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: population_data
Action Input: {'input': 'Japan'}
[0m> Pandas Instructions:
```
df[df['Country'] == 'Japan']
```
> Pandas Output:      Rank Country  Population2023 YearlyChange  NetChange  Density(P/Km²)  \
104    12   Japan       123294513      -0.53 %    -657179             338   

     Land Ar

In [41]:
from pathlib import Path
from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
from llama_index import *


def get_index(data, index_name):
    index = None
    if not os.path.exists(index_name):
        print("building index", index_name)
        index = VectorStoreIndex.from_documents(data, show_progress=True)
        index.storage_context.persist(persist_dir=index_name)
    else:
        index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=index_name)
        )

    return index


pdf_path = '/content/sjsu.pdf'
loader = PDFReader()
sjsu_pdf = loader.load_data(file=Path('/content/sjsu.pdf'))
sjsu_index = get_index(sjsu_pdf, "sjsu")
sjsu_engine = sjsu_index.as_query_engine()

building index sjsu


Parsing nodes:   0%|          | 0/33 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/40 [00:00<?, ?it/s]

In [42]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI

tools = [
    note_engine,
    QueryEngineTool(
        query_engine=population_query_engine,
        metadata=ToolMetadata(
            name="population_data",
            description="this gives information at the world population and demographics",
        ),
    ),
    QueryEngineTool(
        query_engine=sjsu_engine,
        metadata=ToolMetadata(
            name="sjsu_data",
            description="this gives detailed information about San Jose State University",
        ),
    ),
]

llm = OpenAI(model="gpt-3.5-turbo-0613")
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)

while (prompt := input("Enter a prompt (q to quit): ")) != "q":
    result = agent.query(prompt)
    print(result)



Enter a prompt (q to quit): tell me about courses availble at San Jose State University
[1;3;38;5;200mThought: The current language of the user is English. I need to use the sjsu_data tool to get information about the courses available at San Jose State University.
Action: sjsu_data
Action Input: {'input': 'courses'}
[0m[1;3;34mObservation: The university offers a variety of courses through its nine colleges and seven focused schools, covering areas such as business, education, engineering, health sciences, humanities, arts, global education, science, social sciences, art and design, information, journalism, music, and dance.
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer.
Answer: San Jose State University offers a wide range of courses through its nine colleges and seven focused schools. These courses cover various areas such as business, education, engineering, health sciences, humanities, arts, global education, scien