In [1]:
# Import necessary libraries
import pandas as pd
import sys
import os
# Add the parent directory to sys.path
# Use getcwd() to get the current working directory for Jupyter notebooks
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
from src.iterative_prompt_optimization import optimize_prompt

In [2]:
# Load and prepare data
eval_data = pd.read_csv('reviews.csv', encoding='ISO-8859-1', usecols=['Text', 'Sentiment'])
eval_data.columns = ['text', 'label']
# Randomly select 50 positive and 50 negative samples
eval_data = (
    eval_data.groupby('label')
    .apply(lambda x: x.sample(n=10, random_state=42))
    .reset_index(drop=True)
)
# Shuffle the DataFrame randomly
eval_data = eval_data.sample(frac=1, random_state=42).reset_index(drop=True)
print(f"Evaluation data shape: {eval_data.shape}")
print(eval_data.head())

Evaluation data shape: (20, 2)
                                                text  label
0  I hate guns and have never murdered anyone, bu...      0
1  "A Cry in the Dark" is a masterful piece of ci...      1
2  I was watching the Perfect Storm, and thought ...      1
3  If you ask me the first one was really better ...      0
4  The movie 'Heart of Darkness', based on the 18...      0


In [3]:
# Define initial prompt and output format
initial_prompt = (
    "You are a sentiment analysis classifier. Determine whether the provided text expresses a positive sentiment."
)

output_format_prompt = (
    "You are to respond strictly in binary format. For each question, reply only with '1' for positive or '0' for negative. "
    "Do not include any other text, explanations, or comments."
)

# Set number of optimization iterations
iterations = 5

In [4]:
optimize_prompt(initial_prompt, output_format_prompt, eval_data, iterations, 
                   model_provider="ollama", model_name="llama3.1")
# After running the optimization process, you can analyze the results by checking 
# the generated log files in the `runs/prompt_optimization_logs_YYYYMMDD_HHMMSS` directory.

Selected provider: ollama
Selected model: llama3.1
Estimated token usage: 534100
Estimated cost: $0 API Costs - Running on Local Hardware

Do you want to proceed with the optimization? (Y/N): 
Iteration 1/5


Processing text 1/20
Prediction: 1 | Ground Truth: 0 ❌ (FP)
Processing text 2/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 3/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 4/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 5/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 6/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 7/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 8/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 9/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 10/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 11/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 12/20
Prediction: 1 | Ground Truth: 0 ❌ (FP)
Processing text 13/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 14/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 15/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 16/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 17/20
Prediction:


Analyzing misclassifications...



Generating new prompt...

Iteration 2/5


Processing text 1/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 2/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 3/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 4/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 5/20
Prediction: 1 | Ground Truth: 0 ❌ (FP)
Processing text 6/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 7/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 8/20
Prediction: 101111010001 | Ground Truth: 0 🛠️ (Invalid Output Format)
Processing text 9/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 10/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 11/20
Prediction: 10111011100111 | Ground Truth: 1 🛠️ (Invalid Output Format)
Processing text 12/20
Prediction: 01010 | Ground Truth: 0 🛠️ (Invalid Output Format)
Processing text 13/20
Prediction: 0 | Ground Truth: 0 ✅ (TN)
Processing text 14/20
Prediction: 1 | Ground Truth: 1 ✅ (TP)
Processing text 15/20
Prediction: 1 | Ground Truth: 0 ❌ (FP)
Proces


Analyzing misclassifications...



Generating new prompt...


In [None]:
# # Run the prompt optimization process
# optimize_prompt(initial_prompt, output_format_prompt, eval_data, iterations)