In [1]:
experiment_name = "news_exp"

In [2]:
initial_prompt = """
You goal is to classify the following news headlines into one of the following categories:
1- World
2- Sports
3- Business
4- Sci/Tech

Provide your response as a JSON dictionary with the following structure:
{
    "chain_of_thought": "Your step-by-step reasoning here",
    "topic": "The identified topic - the ONLY possible topics are "World", "Sports", "Business", "Sci/Tech". Output just one single category."
}
"""

In [3]:
# Output format prompt
output_format_prompt = """
Provide your response as a JSON dictionary with the following structure:
{
    "chain_of_thought": "Your step-by-step reasoning here",
    "topic": "The identified topic - the ONLY possible topics are "World", "Sports", "Business", "Sci/Tech". Output just one single category."
}
"""

In [4]:
# Define output schema
output_schema = {
    'key_to_extract': 'topic',
    'value_mapping': {
        'World': 'World',
        'Sports': 'Sports',
        'Business': 'Business',
        'Sci/Tech': 'Sci/Tech',
    },
    'regex_pattern': r'"topic":\s*"([^"]+)"',  # Changed from \w+ to [^"]+ to capture everything until the closing quote
    #
    'chain_of_thought_key': 'chain_of_thought',  
    'chain_of_thought_regex': r'"chain_of_thought":\s*"(.*?)"',
    #
    'use_json_mode': True,
}

In [5]:
# Set number of optimization iterations
iterations = 3

In [6]:
# Define model providers and models for evaluation and optimization
eval_provider = "openai"
eval_model = "gpt-4o-mini"
optim_provider = "openai"
optim_model = "gpt-4o-mini"

In [7]:
# Path to the CSV file containing review data for evaluation
eval_datapath = "news.csv"
text_column = "Title"
target_column = "topic"
sample_size = 30

------------------------------------------------------------------------------------------

In [8]:
# Import necessary libraries
import pandas as pd
import sys
import os
# Add the parent directory to sys.path
# Use getcwd() to get the current working directory for Jupyter notebooks
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(grandparent_dir)
from src import optimize_prompt

In [9]:
# Load and prepare data
eval_data = pd.read_csv(eval_datapath, encoding='ISO-8859-1', usecols=[text_column, target_column])
eval_data = eval_data.rename(columns={text_column: 'text', target_column: 'label'})
eval_data["text"] = eval_data["text"].astype(str)
eval_data

Unnamed: 0,text,label
0,Fears for T N pension after talks,Business
1,The Race is On: Second Private Team Sets Launc...,Sci/Tech
2,Ky. Company Wins Grant to Study Peptides (AP),Sci/Tech
3,Prediction Unit Helps Forecast Wildfires (AP),Sci/Tech
4,Calif. Aims to Limit Farm-Related Smog (AP),Sci/Tech
...,...,...
7595,Around the world,World
7596,Void is filled with Clement,Sports
7597,Martinez leaves bitter,Sports
7598,5 of arthritis patients in Singapore take Bext...,Business


In [10]:
# Randomly select n samples from each class
eval_data = (
    eval_data.groupby('label')
    .apply(lambda x: x.sample(n=sample_size, random_state=42))
    .reset_index(drop=True)
)
# Shuffle the DataFrame randomly
eval_data = eval_data.sample(frac=1, random_state=42).reset_index(drop=True)
print(f"Evaluation data shape: {eval_data.shape}")
print(eval_data.head())

Evaluation data shape: (120, 2)
                                                text     label
0         RealNetworks Gets in Content Business (AP)  Sci/Tech
1          SCO Postpones Legal Web Site (NewsFactor)  Sci/Tech
2  Lloyds TSB to Move More Than 1,000 UK Jobs to ...  Business
3         Czech Republic's Cell Operators Fined (AP)  Sci/Tech
4         Nigeria gives Shell \$1.5 billion eco-bill  Business


In [11]:
# Run the prompt optimization process
best_prompt, best_metrics = optimize_prompt(
    initial_prompt = initial_prompt,
    eval_data = eval_data,
    iterations =iterations,
    eval_provider=eval_provider,
    eval_model=eval_model,
    eval_temperature=0.7,
    optim_provider=optim_provider,
    optim_model=optim_model,
    optim_temperature=0,
    use_cache=True,
    output_format_prompt = output_format_prompt,
    output_schema=output_schema,
    fp_comments = "",
    fn_comments = "",
    tp_comments = "",
    invalid_comments="",
    validation_comments="",
    experiment_name = experiment_name,
    skip_prompt_validation = True,
)


Detected problem type: multiclass
Selected evaluation provider: openai
Selected evaluation model: gpt-4o-mini
Evaluation temperature: 0.7
Selected optimization provider: openai
Selected optimization model: gpt-4o-mini
Optimization temperature: 0
Estimated token usage: 489840
Estimated cost: $0.59

Do you want to proceed with the optimization? (Y/N): 
Iteration 1/3


-----------------------------------
Processing text 1/120 .....
Prediction 1/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 2/120 .....
Prediction 2/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 3/120 .....
Prediction 3/120: Business | Ground Truth: Business ✅ (Correct)
-----------------------------------
Processing text 4/120 .....
Prediction 4/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 5/120 .....
Prediction 5/120: Business | Ground Truth: Business ✅ (Correct)
-----------------------------------
Processing text 6/120 .....
Prediction 6/120: Sports | Ground Truth: Sports ✅ (Correct)
-----------------------------------
Processing text 7/120 .....
Prediction 7/120: Sports | Ground Truth: Sports ✅ (Correct)
-----------------------------------
Processing text 8/120 .....
Prediction 8/120: Business | Ground Truth:



Analyzing predictions for multiclass classification...



Iteration 2/3


-----------------------------------
Processing text 1/120 .....
Prediction 1/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 2/120 .....
Prediction 2/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 3/120 .....
Prediction 3/120: Business | Ground Truth: Business ✅ (Correct)
-----------------------------------
Processing text 4/120 .....
Prediction 4/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 5/120 .....
Prediction 5/120: Business | Ground Truth: Business ✅ (Correct)
-----------------------------------
Processing text 6/120 .....
Prediction 6/120: Sports | Ground Truth: Sports ✅ (Correct)
-----------------------------------
Processing text 7/120 .....
Prediction 7/120: Sports | Ground Truth: Sports ✅ (Correct)
-----------------------------------
Processing text 8/120 .....
Prediction 8/120: Business | Ground Truth:



Analyzing predictions for multiclass classification...



Iteration 3/3


-----------------------------------
Processing text 1/120 .....
Prediction 1/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 2/120 .....
Prediction 2/120: Sci/Tech | Ground Truth: Sci/Tech ✅ (Correct)
-----------------------------------
Processing text 3/120 .....
Prediction 3/120: Business | Ground Truth: Business ✅ (Correct)
-----------------------------------
Processing text 4/120 .....
Prediction 4/120: Business | Ground Truth: Sci/Tech ❌ (Incorrect)
-----------------------------------
Processing text 5/120 .....
Prediction 5/120: Business | Ground Truth: Business ✅ (Correct)
-----------------------------------
Processing text 6/120 .....
Prediction 6/120: Sports | Ground Truth: Sports ✅ (Correct)
-----------------------------------
Processing text 7/120 .....
Prediction 7/120: Sports | Ground Truth: Sports ✅ (Correct)
-----------------------------------
Processing text 8/120 .....
Prediction 8/120: Business | Ground Truth: B





All logs saved in directory: /Users/danielfiuzadosil/Documents/GitHub/AI-Prompt-Optimiser/examples/muticlass_classifier/runs/news_exp_Tue_10-Dec-2024_14-36-14
