In [1]:
experiment_name = "medical_notes_exp"

In [2]:
initial_prompt = """In this assignment, you will label medical notes samples into one of five clinical domains. 
Each medical note comes from exactly one of the following five clinical domains:

- Gastroenterology
- Neurology
- Orthopedic
- Radiology
- Urology

Provide your response as a JSON dictionary with the following structure:
{
    "chain_of_thought": "Your step-by-step reasoning here",
    "domain": "The identified domain - the ONLY possible topics are "Gastroenterology", "Neurology", "Orthopedic", "Radiology", "Urology". Output just one single category."
}
"""

In [3]:
# Output format prompt
output_format_prompt = """
Provide your response as a JSON dictionary with the following structure:
{
    "chain_of_thought": "Your step-by-step reasoning here",
    "domain": "The identified domain - the ONLY possible domains are "Gastroenterology", "Neurology", "Orthopedic", "Radiology", "Urology". Output just one single category."
}
"""

In [4]:
# Define output schema
output_schema = {
    'key_to_extract': 'domain',
    'value_mapping': {
        'Gastroenterology': 'Gastroenterology',
        'Neurology': 'Neurology',
        'Orthopedic': 'Orthopedic',
        'Radiology': 'Radiology',
        'Urology': 'Urology',
    },
    'regex_pattern': r'"domain":\s*"([^"]+)"',  
    #
    'chain_of_thought_key': 'chain_of_thought',  
    'chain_of_thought_regex': r'"chain_of_thought":\s*"(.*?)"',
    #
    'use_json_mode': True,
}

In [5]:
# Set number of optimization iterations
iterations = 5

In [6]:
# Define model providers and models for evaluation and optimization
eval_provider = "openai"
eval_model = "gpt-4o-mini"
optim_provider = "openai"
optim_model = "gpt-4o-mini"

In [7]:
# Path to the CSV file containing review data for evaluation
eval_datapath = "data/medical_notes/medical_notes.csv"
text_column = "text"
target_column = "label"
sample_size = 30

------------------------------------------------------------------------------------------

In [8]:
# Import necessary libraries
import pandas as pd
import sys
import os
# Add the parent directory to sys.path
# Use getcwd() to get the current working directory for Jupyter notebooks
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(grandparent_dir)
from src import optimize_prompt

In [9]:
# Load and prepare data
eval_data = pd.read_csv(eval_datapath, encoding='ISO-8859-1', usecols=[text_column, target_column])
eval_data = eval_data.rename(columns={text_column: 'text', target_column: 'label'})
eval_data["text"] = eval_data["text"].astype(str)
eval_data

Unnamed: 0,label,text
0,Neurology,\n\n\n\n\n<B>CC:</B> Difficulty with word find...
1,Orthopedic,\n\n\n\n\n<B>PREOPERATIVE DIAGNOSIS: </B> Gang...
2,Orthopedic,\n\n\n\n\n<B>PREOPERATIVE DIAGNOSIS: </B> Cerv...
3,Radiology,\n\n\n\n\n<B>EXAM:</B>MRI LEFT SHOULDER\n\n<B>...
4,Orthopedic,\n\n\n\n\n<B>HISTORY OF PRESENT ILLNESS: </B> ...
...,...,...
821,Urology,\n\n\n\n\n\n\n\n\n\n\n\n\n
822,Radiology,\n\n\n\n\n<B>CC:</B> Episodic mental status ch...
823,Gastroenterology,\n\n\n\n\n<B>PROCEDURE IN DETAIL: </B> Followi...
824,Urology,\n\n\n\n\n<B>REASON FOR VISIT: </B> Overactive...


In [10]:
# Randomly select n samples from each class
eval_data = (
    eval_data.groupby('label')
    .apply(lambda x: x.sample(n=sample_size, random_state=42))
    .reset_index(drop=True)
)
# Shuffle the DataFrame randomly
eval_data = eval_data.sample(frac=1, random_state=42).reset_index(drop=True)
print(f"Evaluation data shape: {eval_data.shape}")
print(eval_data.head())

Evaluation data shape: (150, 2)
              label                                               text
0        Orthopedic  \n\n\n\n\n<B>EXAM:</B>MRI LEFT SHOULDER\n\n<B>...
1  Gastroenterology  \n\n\n\n\n<B>PREOPERATIVE DIAGNOSIS: </B> Abdo...
2         Radiology  \n\n\n\n\n<B>EXAM:</B>  Renal ultrasound.\n\n<...
3        Orthopedic  \n\n\n\n\n<B>PREOPERATIVE DIAGNOSES: </B> Righ...
4        Orthopedic  \n\n\n\n\n<B>PREOPERATIVE DIAGNOSIS: </B> Righ...


In [11]:
# Run the prompt optimization process
best_prompt, best_metrics = optimize_prompt(
    initial_prompt = initial_prompt,
    eval_data = eval_data,
    iterations =iterations,
    eval_provider=eval_provider,
    eval_model=eval_model,
    eval_temperature=0.5,
    optim_provider=optim_provider,
    optim_model=optim_model,
    optim_temperature=0.2,
    use_cache=True,
    output_format_prompt = output_format_prompt,
    output_schema=output_schema,
    fp_comments = "",
    fn_comments = "",
    tp_comments = "",
    invalid_comments="",
    validation_comments="",
    experiment_name = experiment_name,
    skip_prompt_validation = True,
)


Detected problem type: multiclass
Selected evaluation provider: openai
Selected evaluation model: gpt-4o-mini
Evaluation temperature: 0.5
Selected optimization provider: openai
Selected optimization model: gpt-4o-mini
Optimization temperature: 0.2
Estimated token usage: 84649750
Estimated cost: $101.58

Do you want to proceed with the optimization? (Y/N): 
Iteration 1/5


-----------------------------------
Processing text 1/150 .....
Prediction 1/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 2/150 .....
Prediction 2/150: Gastroenterology | Ground Truth: Gastroenterology ✅ (Correct)
-----------------------------------
Processing text 3/150 .....
Prediction 3/150: Urology | Ground Truth: Radiology ❌ (Incorrect)
-----------------------------------
Processing text 4/150 .....
Prediction 4/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 5/150 .....
Prediction 5/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 6/150 .....
Using cached output for text 6/150
Prediction 6/150: Neurology | Ground Truth: Neurology ✅ (Correct)
-----------------------------------
Processing text 7/150 .....
Using cached output for text 7/150
Prediction 7/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct



Analyzing predictions for multiclass classification...



Iteration 2/5


-----------------------------------
Processing text 1/150 .....
Prediction 1/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 2/150 .....
Prediction 2/150: Gastroenterology | Ground Truth: Gastroenterology ✅ (Correct)
-----------------------------------
Processing text 3/150 .....
Prediction 3/150: Urology | Ground Truth: Radiology ❌ (Incorrect)
-----------------------------------
Processing text 4/150 .....
Prediction 4/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 5/150 .....
Prediction 5/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 6/150 .....
Prediction 6/150: Neurology | Ground Truth: Neurology ✅ (Correct)
-----------------------------------
Processing text 7/150 .....
Prediction 7/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 8/150 .....
Pred



Analyzing predictions for multiclass classification...



Iteration 3/5


-----------------------------------
Processing text 1/150 .....
Prediction 1/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 2/150 .....
Prediction 2/150: Gastroenterology | Ground Truth: Gastroenterology ✅ (Correct)
-----------------------------------
Processing text 3/150 .....
Prediction 3/150: Urology | Ground Truth: Radiology ❌ (Incorrect)
-----------------------------------
Processing text 4/150 .....
Prediction 4/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 5/150 .....
Prediction 5/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 6/150 .....
Prediction 6/150: Neurology | Ground Truth: Neurology ✅ (Correct)
-----------------------------------
Processing text 7/150 .....
Prediction 7/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 8/150 .....
Pred



Analyzing predictions for multiclass classification...



Iteration 4/5


-----------------------------------
Processing text 1/150 .....
Prediction 1/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 2/150 .....
Prediction 2/150: Gastroenterology | Ground Truth: Gastroenterology ✅ (Correct)
-----------------------------------
Processing text 3/150 .....
Prediction 3/150: Urology | Ground Truth: Radiology ❌ (Incorrect)
-----------------------------------
Processing text 4/150 .....
Prediction 4/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 5/150 .....
Prediction 5/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 6/150 .....
Prediction 6/150: Neurology | Ground Truth: Neurology ✅ (Correct)
-----------------------------------
Processing text 7/150 .....
Prediction 7/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 8/150 .....
Pred



Analyzing predictions for multiclass classification...



Iteration 5/5


-----------------------------------
Processing text 1/150 .....
Prediction 1/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 2/150 .....
Prediction 2/150: Gastroenterology | Ground Truth: Gastroenterology ✅ (Correct)
-----------------------------------
Processing text 3/150 .....
Prediction 3/150: Urology | Ground Truth: Radiology ❌ (Incorrect)
-----------------------------------
Processing text 4/150 .....
Prediction 4/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 5/150 .....
Prediction 5/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 6/150 .....
Prediction 6/150: Neurology | Ground Truth: Neurology ✅ (Correct)
-----------------------------------
Processing text 7/150 .....
Prediction 7/150: Orthopedic | Ground Truth: Orthopedic ✅ (Correct)
-----------------------------------
Processing text 8/150 .....
Pred





All logs saved in directory: /Users/danielfiuzadosil/Documents/GitHub/AI-Prompt-Optimiser/examples/muticlass_classifier/runs/medical_notes_exp_Mon_16-Dec-2024_12-13-48
