In [1]:
experiment_name = "sentiment_exp"

In [2]:
initial_prompt = '''
You are a sentiment analysis classifier. Determine whether the provided text expresses a positive sentiment. 
Think through your analysis step by step using chain of thought reasoning. 
After your analysis, respond with a STRIC JSON dictionary containing two keys: 
"chain_of_thought" (your step-by-step reasoning) and "classification" (1 for positive, 0 for negative).

Provide your response as a JSON dictionary with the following structure:
{
    "chain_of_thought": "Your step-by-step reasoning here"
    "classification": 0 or 1,
}
Ensure that "chain_of_thought" contains your detailed analysis, and "classification" is strictly 0 or 1
'''

In [3]:
# Output format prompt
output_format_prompt = '''
Provide your response as a JSON dictionary with the following structure:
{
    "chain_of_thought": "Your step-by-step reasoning here"
    "classification": 0 or 1,
}
Ensure that "chain_of_thought" contains your detailed analysis, and "classification" is strictly 0 or 1
'''

In [4]:
fp_comments = ""
fn_comments = ""
tp_comments = ""
invalid_comments = ""
prompt_engineering_comments = ""
validation_comments = ""

In [5]:
# Define output schema
output_schema = {
    'key_to_extract': 'classification',
    'value_mapping': {'1': 1,'0': 0},
    'regex_pattern': r'"classification":\s*(\d)',
    #
    'chain_of_thought_key': 'chain_of_thought',  
    'chain_of_thought_regex': r'"chain_of_thought":\s*"(.*?)"',
    #
    'use_json_mode': True,
}

In [6]:
# Set number of optimization iterations
iterations = 5

In [7]:
# Define model providers and models for evaluation and optimization
# eval_provider = "ollama"
# eval_model = "llama3.1"
# optim_provider = "ollama"
# optim_model = "llama3.1"
eval_provider = "openai"
eval_model = "gpt-4o-mini"
optim_provider = "openai"
optim_model = "gpt-4o-mini"

In [8]:
# Path to the CSV file containing review data for evaluation
eval_datapath = "sentiments.csv"
sample_size = 20

------------------------------------------------------------------------------------------

In [9]:
# Import necessary libraries
import pandas as pd
import sys
import os
# Add the parent directory to sys.path
# Use getcwd() to get the current working directory for Jupyter notebooks
current_dir = os.getcwd()
grandparent_dir = os.path.dirname(os.path.dirname(current_dir))
sys.path.append(grandparent_dir)
from src import optimize_prompt

In [10]:
# Load and prepare data
eval_data = pd.read_csv(eval_datapath, encoding='ISO-8859-1', usecols=['Text', 'Sentiment'])
eval_data.columns = ['text', 'label']
# Randomly select 50 positive and 50 negative samples
eval_data = (
    eval_data.groupby('label')
    .apply(lambda x: x.sample(n=round(sample_size/2), random_state=42))
    .reset_index(drop=True)
)
# Shuffle the DataFrame randomly
eval_data = eval_data.sample(frac=1, random_state=42).reset_index(drop=True)
print(f"Evaluation data shape: {eval_data.shape}")
print(eval_data.head())

Evaluation data shape: (20, 2)
                                                text  label
0  I hate guns and have never murdered anyone, bu...      0
1  "A Cry in the Dark" is a masterful piece of ci...      1
2  I was watching the Perfect Storm, and thought ...      1
3  If you ask me the first one was really better ...      0
4  The movie 'Heart of Darkness', based on the 18...      0


In [11]:
# Run the prompt optimization process
best_prompt, best_metrics = optimize_prompt(
    initial_prompt = initial_prompt,
    eval_data = eval_data,
    iterations = iterations,
    eval_provider = eval_provider,
    eval_model = eval_model,
    eval_temperature = 0.7,
    optim_provider = optim_provider,
    optim_model = optim_model,
    optim_temperature = 0,
    use_cache = True,
    output_format_prompt = output_format_prompt,
    output_schema = output_schema,
    fp_comments = fp_comments,
    fn_comments = fn_comments,
    tp_comments = tp_comments,
    invalid_comments = invalid_comments,
    prompt_engineering_comments = prompt_engineering_comments,
    validation_comments = validation_comments,
    experiment_name = experiment_name,
    skip_prompt_validation = True,
)


Detected problem type: binary
Selected evaluation provider: openai
Selected evaluation model: gpt-4o-mini
Evaluation temperature: 0.7
Selected optimization provider: openai
Selected optimization model: gpt-4o-mini
Optimization temperature: 0
Estimated token usage: 548900
Estimated cost: $0.66

Do you want to proceed with the optimization? (Y/N): 
Iteration 1/5


-----------------------------------
Processing text 1/20 .....
Using cached output for text 1/20
Prediction 1/20: 1 | Ground Truth: 0 ❌ (FP)
-----------------------------------
Processing text 2/20 .....
Prediction 2/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 3/20 .....
Prediction 3/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 4/20 .....
Using cached output for text 4/20
Prediction 4/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 5/20 .....
Prediction 5/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 6/20 .....
Prediction 6/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 7/20 .....
Using cached output for text 7/20
Prediction 7/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 8/20 .....
Using cached output for text 8/20
Prediction 8/20: 0 | Ground Truth: 0 ✅ (TN)
--------



Analyzing misclassifications, true positives, and invalid outputs...



Iteration 2/5


-----------------------------------
Processing text 1/20 .....
Prediction 1/20: 1 | Ground Truth: 0 ❌ (FP)
-----------------------------------
Processing text 2/20 .....
Prediction 2/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 3/20 .....
Prediction 3/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 4/20 .....
Prediction 4/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 5/20 .....
Prediction 5/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 6/20 .....
Prediction 6/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 7/20 .....
Prediction 7/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 8/20 .....
Prediction 8/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 9/20 .....
Prediction 9/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
P



Analyzing misclassifications, true positives, and invalid outputs...



Iteration 3/5


-----------------------------------
Processing text 1/20 .....
Prediction 1/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 2/20 .....
Prediction 2/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 3/20 .....
Prediction 3/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 4/20 .....
Prediction 4/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 5/20 .....
Prediction 5/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 6/20 .....
Prediction 6/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 7/20 .....
Prediction 7/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 8/20 .....
Prediction 8/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 9/20 .....
Prediction 9/20: 0 | Ground Truth: 1 ❌ (FN)
-----------------------------------
P



Analyzing misclassifications, true positives, and invalid outputs...



Iteration 4/5


-----------------------------------
Processing text 1/20 .....
Prediction 1/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 2/20 .....
Prediction 2/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 3/20 .....
Prediction 3/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 4/20 .....
Prediction 4/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 5/20 .....
Prediction 5/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 6/20 .....
Prediction 6/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 7/20 .....
Prediction 7/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 8/20 .....
Prediction 8/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 9/20 .....
Prediction 9/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
P



Analyzing misclassifications, true positives, and invalid outputs...



Iteration 5/5


-----------------------------------
Processing text 1/20 .....
Prediction 1/20: 1 | Ground Truth: 0 ❌ (FP)
-----------------------------------
Processing text 2/20 .....
Prediction 2/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 3/20 .....
Prediction 3/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 4/20 .....
Prediction 4/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 5/20 .....
Prediction 5/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 6/20 .....
Prediction 6/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 7/20 .....
Prediction 7/20: 1 | Ground Truth: 1 ✅ (TP)
-----------------------------------
Processing text 8/20 .....
Prediction 8/20: 0 | Ground Truth: 0 ✅ (TN)
-----------------------------------
Processing text 9/20 .....
Prediction 9/20: 0 | Ground Truth: 1 ❌ (FN)
-----------------------------------
P





All logs saved in directory: /Users/danielfiuzadosil/Documents/GitHub/AI-Prompt-Optimiser/examples/binary_classifier/runs/sentiment_exp_Wed_11-Dec-2024_10-23-09
