## Necessary Imports

In [1]:
from dotenv import load_dotenv
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
from prompts import instruction_str, context, new_prompt
from note_engine import note_engine
import pathlib
import requests
import pandas as pd

## Data Preprocessing

In [2]:
print(instruction_str)

    1. Convert the query to executable Python code using Pandas.
    2. The final line of code should be a Python expression that can be called with the `eval()` function.
    3. The code should represent a solution to the query.
    4. ONLY USE THE TOOLS PROVIDED TO YOU.
    5. PRINT ONLY THE EXPRESSION.
    6. Do not quote the expression.


In [3]:
# Read the data as a DataFrame
medicines = pd.read_csv('data/medicine_dataset.csv')
# Drop any rows with missing values
medicines = medicines.dropna()
# Print out the first 5 rows of the data
medicines.head()

Unnamed: 0,Name,Category,Dosage Form,Strength,Manufacturer,Indication,Classification
0,Acetocillin,Antidiabetic,Cream,938 mg,Roche Holding AG,Virus,Over-the-Counter
1,Ibuprocillin,Antiviral,Injection,337 mg,CSL Limited,Infection,Over-the-Counter
2,Dextrophen,Antibiotic,Ointment,333 mg,Johnson & Johnson,Wound,Prescription
3,Clarinazole,Antifungal,Syrup,362 mg,AbbVie Inc.,Pain,Prescription
4,Amoxicillin,Antifungal,Tablet,802 mg,Teva Pharmaceutical Industries Ltd.,Wound,Over-the-Counter


## Get Prompts

In [4]:
prompts_fp = pathlib.Path('prompts.py')
if not prompts_fp.exists():
    prompts_url = 'https://raw.githubusercontent.com/techwithtim/PythonAgentAI/main/prompts.py'
    prompts = requests.get(prompts_url)
    with open('prompts.py', 'w') as file:
        file.write(prompts.text)

In [5]:
# Load the OpenAI API key from the .env file
load_dotenv()

query = 'What is the dosage form of amoxicillin?'

# Use the engine
medicine_query_engine = PandasQueryEngine(df=medicines, instruction_str=instruction_str, verbose=True)

medicine_query_engine.update_prompts({'prompt': new_prompt})

# Query the engine
medicine_query_engine.query(query)

> Pandas Instructions:
```
df[df['Name'] == 'Amoxicillin']['Dosage Form'].values[0]
```
> Pandas Output: Tablet


Response(response='Tablet', source_nodes=[], metadata={'pandas_instruction_str': "df[df['Name'] == 'Amoxicillin']['Dosage Form'].values[0]", 'raw_pandas_output': 'Tablet'})

## Query the Engine

In [6]:
# Create tools for the LLM to use
tools = [
    note_engine,
    QueryEngineTool(query_engine=medicine_query_engine, metadata=ToolMetadata(name='medicines', description='Query the medicine dataset'))
]

# Initialize the LLM and the agent
llm = OpenAI(model='gpt-3.5-turbo-16k')
agent = ReActAgent.from_tools(tools=tools, llm=llm, verbose=True, context=context)

In [7]:
# Define a prompt to query the agent
while (prompt := input('Enter a prompt (q to quit): ')) != 'q':
    result = agent.query(prompt)
    print(result)

[1;3;38;5;200mThought: The user is asking for the dosage of Tylenol. I can use the "medicines" tool to provide the dosage information.
Action: medicines
Action Input: {'input': 'Tylenol'}
[0m> Pandas Instructions:
```
df[df['Name'] == 'Tylenol']
```
> Pandas Output: Empty DataFrame
Columns: [Name, Category, Dosage Form, Strength, Manufacturer, Indication, Classification]
Index: []
[1;3;34mObservation: Empty DataFrame
Columns: [Name, Category, Dosage Form, Strength, Manufacturer, Indication, Classification]
Index: []
[0m[1;3;38;5;200mThought: The "medicines" tool did not return any results for Tylenol. I will try using a different approach to find the dosage information.
Action: note_saver
Action Input: {'note': 'Please provide the dosage information for Tylenol.'}
[0m[1;3;34mObservation: note saved
[0m[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
[0m[1;3;38;5;200mThought: I encountered an error while trying to s