In [None]:
import pandas as pd

dataset = pd.read_csv("./sms-spam/collection.tsv", sep="\t", names=["classification", "text"])

In [None]:
from phoenix.otel import register
import os

os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "http://localhost:6006"
# configure the Phoenix tracer
tracer_provider = register(
    auto_instrument=True,
    protocol="http/protobuf",
    project_name="dspy"
)

Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: dspy
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: http://localhost:6006/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  



In [18]:
import dspy
lm = dspy.LM('openai/gpt-4o-mini', api_key=input("API KEY"))
dspy.configure(lm=lm)

In [33]:

from typing import Literal

class TextClassifier(dspy.Signature):
    text: str = dspy.InputField()
    classification: Literal['spam', 'ham'] = dspy.OutputField(desc="whether a text message is legitimate (ham) or spam")


classify = dspy.Predict(TextClassifier)

In [26]:

eval_set = [dspy.Example(text=row["text"], classification=row["classification"]).with_inputs("text") for i, row in dataset.sample(20).iterrows()]
train_set = [dspy.Example(text=row["text"], classification=row["classification"]).with_inputs("text") for i, row in dataset.sample(200).iterrows()]

In [60]:
my_metric = lambda expected, actual, _ = None: actual.classification == expected.classification
evaluate = dspy.Evaluate(
    devset=eval_set,
    metric=my_metric,
    max_errors=1
)


In [61]:
evaluate(classify, display_table=True, display_progress=True)

Average Metric: 17.00 / 20 (85.0%): 100%|██████████| 20/20 [00:10<00:00,  1.88it/s] 

2025/04/06 11:43:15 INFO dspy.evaluate.evaluate: Average Metric: 17 / 20 (85.0%)





Unnamed: 0,text,example_classification,pred_classification,<lambda>
0,Sorry i din lock my keypad.,ham,ham,✔️ [True]
1,Aiyah then i wait lor. Then u entertain me. Hee...,ham,ham,✔️ [True]
2,Ok lor... But buy wat?,ham,ham,✔️ [True]
3,Ooooooh I forgot to tell u I can get on yoville on my phone,ham,ham,✔️ [True]
4,Dad says hurry the hell up,ham,ham,✔️ [True]
5,First has she gained more than &lt;#&gt; kg since she took in. Sec...,ham,ham,✔️ [True]
6,December only! Had your mobile 11mths+? You are entitled to update...,spam,spam,✔️ [True]
7,You intrepid duo you! Have a great time and see you both soon.,ham,ham,✔️ [True]
8,Still i have not checked it da. . .,ham,ham,✔️ [True]
9,When u love someone Dont make them to love u as much as u do. But ...,ham,ham,✔️ [True]


85.0

In [37]:
few_shot = dspy.LabeledFewShot()

few_shot_compiled = few_shot.compile(student=classify, trainset=train_set)

In [62]:
evaluate(few_shot_compiled, display_progress=True, display_table=True)

Average Metric: 18.00 / 20 (90.0%): 100%|██████████| 20/20 [00:16<00:00,  1.23it/s] 

2025/04/06 11:43:43 INFO dspy.evaluate.evaluate: Average Metric: 18 / 20 (90.0%)





Unnamed: 0,text,example_classification,pred_classification,<lambda>
0,Sorry i din lock my keypad.,ham,ham,✔️ [True]
1,Aiyah then i wait lor. Then u entertain me. Hee...,ham,ham,✔️ [True]
2,Ok lor... But buy wat?,ham,ham,✔️ [True]
3,Ooooooh I forgot to tell u I can get on yoville on my phone,ham,ham,✔️ [True]
4,Dad says hurry the hell up,ham,ham,✔️ [True]
5,First has she gained more than &lt;#&gt; kg since she took in. Sec...,ham,ham,✔️ [True]
6,December only! Had your mobile 11mths+? You are entitled to update...,spam,spam,✔️ [True]
7,You intrepid duo you! Have a great time and see you both soon.,ham,ham,✔️ [True]
8,Still i have not checked it da. . .,ham,ham,✔️ [True]
9,When u love someone Dont make them to love u as much as u do. But ...,ham,ham,✔️ [True]


90.0

In [67]:
few_shot_compiled.save("./labeled_few_shot.json")

In [45]:
bootstrap_few_shot = dspy.BootstrapFewShot(max_labeled_demos=8, max_rounds=2)

bootstrap_few_shot_compiled = bootstrap_few_shot.compile(classify, trainset=train_set)

  2%|▏         | 4/200 [00:00<00:03, 55.13it/s]

Bootstrapped 4 full traces after 4 examples for up to 2 rounds, amounting to 4 attempts.





In [63]:
evaluate(bootstrap_few_shot_compiled, display_progress=True, display_table=True)

  0%|          | 0/20 [00:00<?, ?it/s]

Average Metric: 19.00 / 20 (95.0%): 100%|██████████| 20/20 [00:15<00:00,  1.31it/s] 

2025/04/06 11:44:22 INFO dspy.evaluate.evaluate: Average Metric: 19 / 20 (95.0%)





Unnamed: 0,text,example_classification,pred_classification,<lambda>
0,Sorry i din lock my keypad.,ham,ham,✔️ [True]
1,Aiyah then i wait lor. Then u entertain me. Hee...,ham,ham,✔️ [True]
2,Ok lor... But buy wat?,ham,ham,✔️ [True]
3,Ooooooh I forgot to tell u I can get on yoville on my phone,ham,ham,✔️ [True]
4,Dad says hurry the hell up,ham,ham,✔️ [True]
5,First has she gained more than &lt;#&gt; kg since she took in. Sec...,ham,ham,✔️ [True]
6,December only! Had your mobile 11mths+? You are entitled to update...,spam,spam,✔️ [True]
7,You intrepid duo you! Have a great time and see you both soon.,ham,ham,✔️ [True]
8,Still i have not checked it da. . .,ham,ham,✔️ [True]
9,When u love someone Dont make them to love u as much as u do. But ...,ham,ham,✔️ [True]


95.0

In [41]:
bootstrap_few_shot_compiled.save("./bootstrap-few-shot.json")

In [None]:
mipro = dspy.MIPROv2(my_metric)

mipro_compiled = mipro.compile(classify, trainset=train_set)

2025/04/06 11:27:21 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/04/06 11:27:21 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/04/06 11:27:21 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=10 sets of demonstrations...


Bootstrapping set 1/10
Bootstrapping set 2/10
Bootstrapping set 3/10


 10%|█         | 4/40 [00:00<00:00, 40.49it/s]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 4/10


  5%|▌         | 2/40 [00:01<00:30,  1.26it/s]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/10


  2%|▎         | 1/40 [00:00<00:27,  1.40it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/10


  5%|▌         | 2/40 [00:02<00:42,  1.13s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 7/10


  5%|▌         | 2/40 [00:01<00:24,  1.57it/s]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 8/10


  5%|▌         | 2/40 [00:01<00:27,  1.37it/s]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 9/10


  2%|▎         | 1/40 [00:00<00:23,  1.64it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 10/10


  2%|▎         | 1/40 [00:00<00:31,  1.23it/s]
2025/04/06 11:27:29 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/04/06 11:27:29 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2025/04/06 11:27:29 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...



Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.


2025/04/06 11:28:24 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/04/06 11:28:24 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given the fields `text`, produce the fields `classification`.

2025/04/06 11:28:24 INFO dspy.teleprompt.mipro_optimizer_v2: 1: In a world where personal communication is increasingly flooded with unwanted messages, your task is crucial. Given the field `text`, classify the message as either "ham" (legitimate communication) or "spam" (unwanted content). Your classification will help users navigate their messages effectively, ensuring they receive only the correspondence that matters to them. Analyze the emotional tone, structure, and context of the message to make an accurate determination of its classification.

2025/04/06 11:28:24 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Analyze the provided text input and classify it as either "ham" or "spam." Consider the emotional tone, personal anecdotes, and contextual cues in the 

Average Metric: 151.00 / 160 (94.4%): 100%|██████████| 160/160 [00:16<00:00,  9.97it/s]

2025/04/06 11:28:40 INFO dspy.evaluate.evaluate: Average Metric: 151 / 160 (94.4%)
2025/04/06 11:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 94.38

2025/04/06 11:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:03<00:00,  6.56it/s] 

2025/04/06 11:28:44 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:28:44 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2'].
2025/04/06 11:28:44 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0]
2025/04/06 11:28:44 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:28:44 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:28:44 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:06<00:00,  3.92it/s]

2025/04/06 11:28:51 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:28:51 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 2'].
2025/04/06 11:28:51 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0]
2025/04/06 11:28:51 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:28:51 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:28:51 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:03<00:00,  6.85it/s]

2025/04/06 11:28:54 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:28:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 6'].
2025/04/06 11:28:54 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0]
2025/04/06 11:28:54 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:28:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:28:54 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 34 - Minibatch ==



Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:03<00:00,  6.83it/s]

2025/04/06 11:28:58 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)
2025/04/06 11:28:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:28:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0]
2025/04/06 11:28:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:28:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:28:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:03<00:00,  6.86it/s]


2025/04/06 11:29:02 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:29:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 8'].
2025/04/06 11:29:02 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0]
2025/04/06 11:29:02 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:29:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:29:02 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 7 / 34 - Minibatch ==


Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:03<00:00,  6.90it/s]

2025/04/06 11:29:06 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:29:06 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 3'].
2025/04/06 11:29:06 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0]
2025/04/06 11:29:06 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:29:06 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:29:06 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:09<00:00,  2.77it/s]

2025/04/06 11:29:15 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:29:15 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 9', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:29:15 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0]
2025/04/06 11:29:15 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:29:15 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:29:15 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:12<00:00,  1.98it/s]

2025/04/06 11:29:27 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:29:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 4'].
2025/04/06 11:29:27 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0]
2025/04/06 11:29:27 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:29:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:29:27 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:12<00:00,  2.06it/s] 

2025/04/06 11:29:40 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:29:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 7'].
2025/04/06 11:29:40 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0]
2025/04/06 11:29:40 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38]
2025/04/06 11:29:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 94.38


2025/04/06 11:29:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 34 - Full Evaluation =====
2025/04/06 11:29:40 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 100.0) from minibatch trials...



Average Metric: 155.00 / 160 (96.9%): 100%|██████████| 160/160 [01:14<00:00,  2.15it/s]

2025/04/06 11:30:54 INFO dspy.evaluate.evaluate: Average Metric: 155 / 160 (96.9%)
2025/04/06 11:30:54 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 96.88
2025/04/06 11:30:54 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:30:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88
2025/04/06 11:30:54 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/04/06 11:30:54 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:03<00:00,  6.29it/s] 

2025/04/06 11:30:58 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:30:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:30:58 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0]
2025/04/06 11:30:58 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:30:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:30:58 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 13 / 34 - Minibatch ==



Average Metric: 18.00 / 20 (90.0%):  80%|████████  | 20/25 [00:14<00:02,  1.77it/s]

2025/04/06 11:31:13 ERROR dspy.utils.parallelizer: Error for Example({'text': 'SMS AUCTION - A BRAND NEW Nokia 7250 is up 4 auction today! Auction is FREE 2 join & take part! Txt NOKIA to 86021 now!', 'classification': 'spam'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on requests per min (RPM): Limit 500, Used 500, Requested 1. Please try again in 120ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 22.00 / 24 (91.7%): 100%|██████████| 25/25 [00:16<00:00,  1.51it/s]

2025/04/06 11:31:15 INFO dspy.evaluate.evaluate: Average Metric: 22.0 / 25 (88.0%)
2025/04/06 11:31:15 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 7'].
2025/04/06 11:31:15 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0]
2025/04/06 11:31:15 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:31:15 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:31:15 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 14 / 34 - Minibatch ==



Average Metric: 22.00 / 22 (100.0%):  84%|████████▍ | 21/25 [00:14<00:03,  1.18it/s]

2025/04/06 11:31:29 ERROR dspy.utils.parallelizer: Error for Example({'text': "Yeah I think my usual guy's still passed out from last night, if you get ahold of anybody let me know and I'll throw down", 'classification': 'ham'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on tokens per min (TPM): Limit 200000, Used 198749, Requested 1356. Please try again in 31ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 24.00 / 24 (100.0%): 100%|██████████| 25/25 [00:15<00:00,  1.60it/s]

2025/04/06 11:31:30 INFO dspy.evaluate.evaluate: Average Metric: 24.0 / 25 (96.0%)
2025/04/06 11:31:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 4'].
2025/04/06 11:31:30 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0]
2025/04/06 11:31:30 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:31:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:31:30 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 15 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:15<00:00,  1.58it/s] 

2025/04/06 11:31:46 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:31:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 1'].
2025/04/06 11:31:46 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0]
2025/04/06 11:31:46 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:31:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:31:46 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 16 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:17<00:00,  1.44it/s] 

2025/04/06 11:32:04 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:32:04 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 9'].
2025/04/06 11:32:04 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0]
2025/04/06 11:32:04 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:32:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:32:04 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 17 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:14<00:00,  1.74it/s] 

2025/04/06 11:32:18 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:32:18 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:32:18 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0]
2025/04/06 11:32:18 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:32:18 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:32:18 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 18 / 34 - Minibatch ==



Average Metric: 22.00 / 23 (95.7%):  92%|█████████▏| 23/25 [00:14<00:01,  1.60it/s] 

2025/04/06 11:32:33 ERROR dspy.utils.parallelizer: Error for Example({'text': 'I prefer my free days... Tues, wed, fri oso can... Ü ask those workin lor...', 'classification': 'ham'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on tokens per min (TPM): Limit 200000, Used 198856, Requested 1357. Please try again in 63ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 23.00 / 24 (95.8%): 100%|██████████| 25/25 [00:14<00:00,  1.68it/s]

2025/04/06 11:32:33 INFO dspy.evaluate.evaluate: Average Metric: 23.0 / 25 (92.0%)
2025/04/06 11:32:33 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 4'].
2025/04/06 11:32:33 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0]
2025/04/06 11:32:33 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:32:33 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:32:33 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 19 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:17<00:00,  1.44it/s] 

2025/04/06 11:32:50 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:32:50 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 3'].
2025/04/06 11:32:50 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0]
2025/04/06 11:32:50 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:32:50 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:32:50 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 20 / 34 - Minibatch ==



Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:14<00:00,  1.73it/s]

2025/04/06 11:33:05 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)
2025/04/06 11:33:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 9'].
2025/04/06 11:33:05 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0]
2025/04/06 11:33:05 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88]
2025/04/06 11:33:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:33:05 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 / 34 - Full Evaluation =====
2025/04/06 11:33:05 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 100.0) from minibatch trials...



Average Metric: 152.00 / 160 (95.0%): 100%|██████████| 160/160 [01:30<00:00,  1.78it/s]

2025/04/06 11:34:35 INFO dspy.evaluate.evaluate: Average Metric: 152 / 160 (95.0%)
2025/04/06 11:34:35 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:34:35 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88
2025/04/06 11:34:35 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/04/06 11:34:35 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 22 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:02<00:00,  9.37it/s]

2025/04/06 11:34:38 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:34:38 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 9'].
2025/04/06 11:34:38 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0]
2025/04/06 11:34:38 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:34:38 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:34:38 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 23 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:15<00:00,  1.64it/s] 

2025/04/06 11:34:53 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:34:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 9'].
2025/04/06 11:34:53 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0]
2025/04/06 11:34:53 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:34:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:34:53 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 24 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:14<00:00,  1.73it/s]

2025/04/06 11:35:08 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:35:08 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 6'].
2025/04/06 11:35:08 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0]
2025/04/06 11:35:08 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:35:08 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:35:08 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 25 / 34 - Minibatch ==



Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:17<00:00,  1.43it/s]

2025/04/06 11:35:25 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)
2025/04/06 11:35:25 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:35:25 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0]
2025/04/06 11:35:25 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:35:25 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:35:25 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 26 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:14<00:00,  1.72it/s]

2025/04/06 11:35:40 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:35:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:35:40 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0]
2025/04/06 11:35:40 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:35:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:35:40 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 27 / 34 - Minibatch ==



Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:15<00:00,  1.60it/s]

2025/04/06 11:35:56 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)
2025/04/06 11:35:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:35:56 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0, 100.0]
2025/04/06 11:35:56 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:35:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:35:56 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 28 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:13<00:00,  1.86it/s]

2025/04/06 11:36:09 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:36:09 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 8'].
2025/04/06 11:36:09 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0, 100.0, 96.0]
2025/04/06 11:36:09 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:36:09 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:36:09 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 29 / 34 - Minibatch ==



Average Metric: 25.00 / 25 (100.0%): 100%|██████████| 25/25 [00:16<00:00,  1.54it/s]

2025/04/06 11:36:25 INFO dspy.evaluate.evaluate: Average Metric: 25 / 25 (100.0%)
2025/04/06 11:36:25 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:36:25 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0, 100.0, 96.0, 100.0]
2025/04/06 11:36:25 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:36:25 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:36:25 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 30 / 34 - Minibatch ==



Average Metric: 23.00 / 25 (92.0%): 100%|██████████| 25/25 [00:03<00:00,  7.02it/s] 

2025/04/06 11:36:29 INFO dspy.evaluate.evaluate: Average Metric: 23 / 25 (92.0%)
2025/04/06 11:36:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 9', 'Predictor 0: Few-Shot Set 0'].
2025/04/06 11:36:29 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0, 100.0, 96.0, 100.0, 92.0]
2025/04/06 11:36:29 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0]
2025/04/06 11:36:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:36:29 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 31 / 34 - Full Evaluation =====
2025/04/06 11:36:29 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 100.0) from minibatch trials...



Average Metric: 69.00 / 71 (97.2%):  44%|████▍     | 71/160 [00:38<00:56,  1.57it/s] 

2025/04/06 11:37:08 ERROR dspy.utils.parallelizer: Error for Example({'text': 'Dear good morning now only i am up', 'classification': 'ham'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on tokens per min (TPM): Limit 200000, Used 198732, Requested 1409. Please try again in 42ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 82.00 / 85 (96.5%):  54%|█████▍    | 86/160 [00:47<00:59,  1.24it/s]

2025/04/06 11:37:16 ERROR dspy.utils.parallelizer: Error for Example({'text': "I'm really sorry i won't b able 2 do this friday.hope u can find an alternative.hope yr term's going ok:-)", 'classification': 'ham'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on tokens per min (TPM): Limit 200000, Used 199718, Requested 1427. Please try again in 343ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 92.00 / 95 (96.8%):  61%|██████    | 97/160 [00:54<00:44,  1.41it/s]

2025/04/06 11:37:25 ERROR dspy.utils.parallelizer: Error for Example({'text': 'Everybody had fun this evening. Miss you.', 'classification': 'ham'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on tokens per min (TPM): Limit 200000, Used 199318, Requested 1411. Please try again in 218ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 112.00 / 115 (97.4%):  74%|███████▍  | 118/160 [01:06<00:23,  1.78it/s]

2025/04/06 11:37:36 ERROR dspy.utils.parallelizer: Error for Example({'text': 'Dude u knw also telugu..thts gud..k, gud nyt..', 'classification': 'ham'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on requests per min (RPM): Limit 500, Used 500, Requested 1. Please try again in 120ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 151.00 / 156 (96.8%): 100%|██████████| 160/160 [01:31<00:00,  1.75it/s]

2025/04/06 11:38:01 INFO dspy.evaluate.evaluate: Average Metric: 151.0 / 160 (94.4%)
2025/04/06 11:38:01 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0, 94.38]
2025/04/06 11:38:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88
2025/04/06 11:38:01 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/04/06 11:38:01 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 32 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:02<00:00,  8.71it/s]

2025/04/06 11:38:04 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:38:04 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 0'].
2025/04/06 11:38:04 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0, 100.0, 96.0, 100.0, 92.0, 96.0]
2025/04/06 11:38:04 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0, 94.38]
2025/04/06 11:38:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:38:04 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 33 / 34 - Minibatch ==



Average Metric: 24.00 / 25 (96.0%): 100%|██████████| 25/25 [00:06<00:00,  3.81it/s] 

2025/04/06 11:38:10 INFO dspy.evaluate.evaluate: Average Metric: 24 / 25 (96.0%)
2025/04/06 11:38:10 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 96.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 5'].
2025/04/06 11:38:10 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [96.0, 92.0, 96.0, 100.0, 96.0, 96.0, 96.0, 92.0, 96.0, 96.0, 88.0, 96.0, 96.0, 92.0, 92.0, 92.0, 92.0, 100.0, 92.0, 92.0, 96.0, 100.0, 96.0, 100.0, 96.0, 100.0, 92.0, 96.0, 96.0]
2025/04/06 11:38:10 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0, 94.38]
2025/04/06 11:38:10 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88


2025/04/06 11:38:10 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 34 / 34 - Full Evaluation =====
2025/04/06 11:38:10 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 100.0) from minibatch trials...



Average Metric: 58.00 / 60 (96.7%):  38%|███▊      | 60/160 [00:34<01:21,  1.22it/s] 

2025/04/06 11:38:46 ERROR dspy.utils.parallelizer: Error for Example({'text': 'YOUR CHANCE TO BE ON A REALITY FANTASY SHOW call now = 08707509020 Just 20p per min NTT Ltd, PO Box 1327 Croydon CR9 5WB 0870 is a national = rate call.', 'classification': 'spam'}) (input_keys={'text'}): litellm.RateLimitError: RateLimitError: OpenAIException - Rate limit reached for gpt-4o-mini in organization org-Cj9hZ8dens3JYF8ae7IcNsL0 on tokens per min (TPM): Limit 200000, Used 199303, Requested 1409. Please try again in 213ms. Visit https://platform.openai.com/account/rate-limits to learn more.. Set `provide_traceback=True` for traceback.


Average Metric: 155.00 / 159 (97.5%): 100%|██████████| 160/160 [01:26<00:00,  1.84it/s]

2025/04/06 11:39:37 INFO dspy.evaluate.evaluate: Average Metric: 155.0 / 160 (96.9%)
2025/04/06 11:39:37 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [94.38, 96.88, 95.0, 94.38, 96.88]
2025/04/06 11:39:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 96.88
2025/04/06 11:39:37 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/04/06 11:39:37 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 96.88!





In [64]:
evaluate(mipro_compiled, display_progress=True, display_table=True)

Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:12<00:00,  1.54it/s]

2025/04/06 11:45:43 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)





Unnamed: 0,text,example_classification,pred_classification,<lambda>
0,Sorry i din lock my keypad.,ham,ham,✔️ [True]
1,Aiyah then i wait lor. Then u entertain me. Hee...,ham,ham,✔️ [True]
2,Ok lor... But buy wat?,ham,ham,✔️ [True]
3,Ooooooh I forgot to tell u I can get on yoville on my phone,ham,ham,✔️ [True]
4,Dad says hurry the hell up,ham,ham,✔️ [True]
5,First has she gained more than &lt;#&gt; kg since she took in. Sec...,ham,ham,✔️ [True]
6,December only! Had your mobile 11mths+? You are entitled to update...,spam,spam,✔️ [True]
7,You intrepid duo you! Have a great time and see you both soon.,ham,ham,✔️ [True]
8,Still i have not checked it da. . .,ham,ham,✔️ [True]
9,When u love someone Dont make them to love u as much as u do. But ...,ham,ham,✔️ [True]


100.0

In [57]:
mipro_compiled.save("./miprov2.json")