## Necessary Imports

In [1]:
from dotenv import load_dotenv
from llama_index.experimental.query_engine import PandasQueryEngine
from prompts import new_prompt, instruction_str
import pathlib
import requests
import pandas as pd

## Data Preprocessing

In [2]:
# Read the data as a DataFrame
medicines = pd.read_csv('data/medicine_dataset.csv')
# Drop any rows with missing values
medicines = medicines.dropna()
# Print out the first 5 rows of the data
medicines.head()

Unnamed: 0,Name,Category,Dosage Form,Strength,Manufacturer,Indication,Classification
0,Acetocillin,Antidiabetic,Cream,938 mg,Roche Holding AG,Virus,Over-the-Counter
1,Ibuprocillin,Antiviral,Injection,337 mg,CSL Limited,Infection,Over-the-Counter
2,Dextrophen,Antibiotic,Ointment,333 mg,Johnson & Johnson,Wound,Prescription
3,Clarinazole,Antifungal,Syrup,362 mg,AbbVie Inc.,Pain,Prescription
4,Amoxicillin,Antifungal,Tablet,802 mg,Teva Pharmaceutical Industries Ltd.,Wound,Over-the-Counter


## Get Prompts

In [3]:
prompts_fp = pathlib.Path('prompts.py')
if not prompts_fp.exists():
    prompts_url = 'https://raw.githubusercontent.com/techwithtim/PythonAgentAI/main/prompts.py'
    prompts = requests.get(prompts_url)
    with open('prompts.py', 'w') as file:
        file.write(prompts.text)

In [8]:
# Load the OpenAI API key from the .env file
load_dotenv()

query = 'What is the dosage form of amoxicillin?'
# Use the engine
medicine_query_engine = PandasQueryEngine(df=medicines, verbose=True)
# medicine_query_engine.update_prompts({'pandas_prompt': new_prompt})
# Query the engine
medicine_query_engine.query(query)

> Pandas Instructions:
```
df[df['Name'] == 'Amoxicillin']['Dosage Form'].values[0]
```
> Pandas Output: Tablet


Response(response='Tablet', source_nodes=[], metadata={'pandas_instruction_str': "df[df['Name'] == 'Amoxicillin']['Dosage Form'].values[0]", 'raw_pandas_output': 'Tablet'})