## Necessary Imports

In [1]:
from dotenv import load_dotenv
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
from prompts import instruction_str, context, new_prompt
from note_engine import note_engine
import pathlib
import requests
import pandas as pd

## Data Preprocessing

In [2]:
# Read the data as a DataFrame
medicines = pd.read_csv('data/medicine_dataset.csv')
# Drop any rows with missing values
medicines = medicines.dropna()
# Print out the first 5 rows of the data
medicines.head()

Unnamed: 0,Name,Category,Dosage Form,Strength,Manufacturer,Indication,Classification
0,Acetocillin,Antidiabetic,Cream,938 mg,Roche Holding AG,Virus,Over-the-Counter
1,Ibuprocillin,Antiviral,Injection,337 mg,CSL Limited,Infection,Over-the-Counter
2,Dextrophen,Antibiotic,Ointment,333 mg,Johnson & Johnson,Wound,Prescription
3,Clarinazole,Antifungal,Syrup,362 mg,AbbVie Inc.,Pain,Prescription
4,Amoxicillin,Antifungal,Tablet,802 mg,Teva Pharmaceutical Industries Ltd.,Wound,Over-the-Counter


## Get Prompts

In [3]:
prompts_fp = pathlib.Path('prompts.py')
if not prompts_fp.exists():
    prompts_url = 'https://raw.githubusercontent.com/techwithtim/PythonAgentAI/main/prompts.py'
    prompts = requests.get(prompts_url)
    with open('prompts.py', 'w') as file:
        file.write(prompts.text)

In [4]:
# Load the OpenAI API key from the .env file
load_dotenv()

query = 'What is the dosage form of amoxicillin?'

# Use the engine
medicine_query_engine = PandasQueryEngine(df=medicines, instruction_str=instruction_str, verbose=True)

medicine_query_engine.update_prompts({'prompt': new_prompt})

# Query the engine
medicine_query_engine.query(query)

> Pandas Instructions:
```
df[df['Name'] == 'Amoxicillin']['Dosage Form'].values[0]
```
> Pandas Output: Tablet


Response(response='Tablet', source_nodes=[], metadata={'pandas_instruction_str': "df[df['Name'] == 'Amoxicillin']['Dosage Form'].values[0]", 'raw_pandas_output': 'Tablet'})

## Query the Engine

In [5]:
from pdf import amoxicillin_engine

# Create tools for the LLM to use
tools = [
    note_engine,
    QueryEngineTool(query_engine=medicine_query_engine, metadata=ToolMetadata(name='medicine_data', description='This gives information about medicines')),
    QueryEngineTool(query_engine=amoxicillin_engine, metadata=ToolMetadata(name='amoxicillin_data', description='This gives detailed information about amoxicillin'))
]

# Initialize the LLM and the agent
llm = OpenAI(model='gpt-3.5-turbo-16k')
agent = ReActAgent.from_tools(tools=tools, llm=llm, verbose=True, context=context)

In [6]:
# Define a prompt to query the agent
while (prompt := input('Enter a prompt (q to quit): ')) != 'q':
    result = agent.query(prompt)
    print(result)

[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: amoxicillin_data
Action Input: {'input': 'creation year'}
[0m[1;3;34mObservation: 2014
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer.
Answer: Amoxicillin was created in 2014.
[0mAmoxicillin was created in 2014.
[1;3;38;5;200mThought: The user is asking about the manufacturer of a medicine. I can use the "medicine_data" tool to provide this information.
Action: medicine_data
Action Input: {'input': 'manufacturer'}
[0m> Pandas Instructions:
```
df['Manufacturer']
```
> Pandas Output: 0                           Roche Holding AG
1                                CSL Limited
2                          Johnson & Johnson
3                                AbbVie Inc.
4        Teva Pharmaceutical Industries Ltd.
                        ...                 
49995                  Eli Lilly and Company