In [None]:
!pip install aiohttp==3.9.3 aiosignal==1.3.1 annotated-types==0.6.0 anyio==4.2.0 attrs==23.2.0 certifi==2024.2.2 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 dataclasses-json==0.6.4 Deprecated==1.2.14 dirtyjson==1.0.8 distro==1.9.0 frozenlist==1.4.1 fsspec==2024.2.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.2 httpx==0.26.0 idna==3.6 joblib==1.3.2 marshmallow==3.20.2 multidict==6.0.5 mypy-extensions==1.0.0 nest-asyncio==1.6.0 networkx==3.2.1 nltk==3.8.1 numpy openai==1.12.0 packaging==23.2 pandas==2.2.0 pydantic==2.6.1 pydantic_core==2.16.2 pypdf==4.0.1 python-dateutil==2.8.2 python-dotenv==1.0.1 pytz==2024.1 regex==2023.12.25 requests==2.31.0 six==1.16.0 sniffio==1.3.0 SQLAlchemy==2.0.27 tenacity==8.2.3 tiktoken==0.6.0 tqdm==4.66.2 typing-inspect==0.9.0 typing_extensions==4.9.0 tzdata==2024.1 urllib3==2.2.0 wrapt==1.16.0 yarl==1.9.4



In [None]:
!pip install llama_index



In [None]:
!pip install llama_index.core



In [None]:
from llama_index.core import PromptTemplate


instruction_str = """\
    1. Convert the query to executable Python code using Pandas.
    2. The final line of code should be a Python expression that can be called with the `eval()` function.
    3. The code should represent a solution to the query.
    4. PRINT ONLY THE EXPRESSION.
    5. Do not quote the expression."""

new_prompt = PromptTemplate(
    """\
    You are working with a pandas dataframe in Python.
    The name of the dataframe is `df`.
    This is the result of `print(df.head())`:
    {df_str}

    Follow these instructions:
    {instruction_str}
    Query: {query_str}

    Expression: """
)

context = """Purpose: The primary role of this agent is to assist users by providing accurate
            information about world population statistics and details about a country. """

In [None]:
!pip install llama-index-readers-smart-pdf-loader



In [None]:
!pip install llmsherpa



In [None]:
import os

os.environ["OPENAI_API_KEY"] = "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"

In [None]:
import os
from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
#from llama_index.readers import PDFReader
from llama_index.readers.smart_pdf_loader import SmartPDFLoader



def get_index(data, index_name):
    index = None
    if not os.path.exists(index_name):
        print("building index", index_name)
        index = VectorStoreIndex.from_documents(data, show_progress=True)
        index.storage_context.persist(persist_dir=index_name)
    else:
        index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=index_name)
        )

    return index

llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all"
#pdf_url = "https://arxiv.org/pdf/1910.13461.pdf"  # also allowed is a file path e.g. /home/downloads/xyz.pdf
pdf_path = '/content/drive/MyDrive/CMPE-258/Assignment 4 dataset/Canada.pdf'
pdf_loader = SmartPDFLoader(llmsherpa_api_url=llmsherpa_api_url)
canada_pdf = pdf_loader.load_data(pdf_path)

#canada_pdf = PDFReader().load_data(file=pdf_path)
canada_index = get_index(canada_pdf, "canada")
canada_engine = canada_index.as_query_engine()

building index canada


Parsing nodes:   0%|          | 0/729 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/729 [00:00<?, ?it/s]

In [None]:
import os

# Retrieve the value of an environment variable
my_variable_value = os.getenv('OPENAI_API_KEY', 'default_value')

print(my_variable_value)


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


In [None]:
from llama_index.core.tools import FunctionTool
import os

note_file = '/content/drive/MyDrive/CMPE-258/Assignment 4 dataset/notes.txt'


def save_note(note):
    if not os.path.isfile(note_file):
        open(note_file, "w")

    with open(note_file, "a") as f:
        f.writelines([note + "\n"])

    return "note saved"


note_engine = FunctionTool.from_defaults(
    fn=save_note,
    name="note_saver",
    description="this tool can save a text based note to a file for the user",
)




In [None]:
from dotenv import load_dotenv
import os
import pandas as pd
from llama_index.core.query_engine import PandasQueryEngine


from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
#from llama_index.llms import OpenAI
from llama_index.llms.openai import OpenAI

load_dotenv()

population_path = '/content/drive/MyDrive/CMPE-258/Assignment 4 dataset/population.csv'
population_df = pd.read_csv(population_path)

population_query_engine = PandasQueryEngine(
    df=population_df, verbose=True, instruction_str=instruction_str
)
population_query_engine.update_prompts({"pandas_prompt": new_prompt})

tools = [
    note_engine,
    QueryEngineTool(
        query_engine=population_query_engine,
        metadata=ToolMetadata(
            name="population_data",
            description="this gives information at the world population and demographics",
        ),
    ),
    QueryEngineTool(
        query_engine=canada_engine,
        metadata=ToolMetadata(
            name="canada_data",
            description="this gives detailed information about canada the country",
        ),
    ),
]

llm = OpenAI(model="gpt-3.5-turbo-0613")
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)

while (prompt := input("Enter a prompt (q to quit): ")) != "q":
    result = agent.query(prompt)
    print(result)



Enter a prompt (q to quit): canada size?
[1;3;38;5;200mThought: The user is asking about the size of Canada. I can use the "canada_data" tool to get detailed information about Canada, including its size.
Action: canada_data
Action Input: {'input': 'size'}
[0m[1;3;34mObservation: 9,093,507 km2
[0m[1;3;38;5;200mThought: I have obtained the information about the size of Canada using the "canada_data" tool. I can now provide the answer to the user's question.
Answer: Canada has a size of 9,093,507 square kilometers.
[0mCanada has a size of 9,093,507 square kilometers.
Enter a prompt (q to quit): can you take a note that canada size is 9093507km
[1;3;38;5;200mThought: The current language of the user is: English. I can use the note_saver tool to take a note of the information provided.
Action: note_saver
Action Input: {'note': 'Canada size is 9093507km'}
[0m[1;3;34mObservation: note saved
[0m[1;3;38;5;200mThought: I have successfully saved the note about Canada's size. Now I can 