In [None]:
!pip install dspy



In [None]:
!pip install dspy-ai numpy tqdm



In [None]:
!pip install --upgrade dspy-ai numpy tqdm



In [None]:
import dspy
import random
import numpy as np
from typing import List
from dspy.teleprompt import MIPROv2
from tqdm import tqdm

print("所有包已成功导入!")

所有包已成功导入!


In [None]:
import dspy
import random
import numpy as np
from typing import List
from tqdm import tqdm
import re
from dspy.teleprompt import MIPROv2

class MultiplicationSignature(dspy.Signature):
    """Solve multiplication problems step by step."""
    task = dspy.InputField(desc="multiplication task as a string")
    solution = dspy.OutputField(desc="final solution as a number")

class MultiplicationSolver(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought(MultiplicationSignature)

    def forward(self, task):
        return self.generate_answer(task=task)

def generate_multiplication_dataset(num_samples=1000) -> List[dspy.Example]:
    """Generate multiplication problems with solutions."""
    dataset = []
    for _ in range(num_samples):
        max_num = int(1e5)
        a = random.randint(1, max_num)
        b = random.randint(1, max_num)
        task = f"{a} * {b}"
        solution = a * b
        dataset.append(dspy.Example(task=task, solution=solution).with_inputs('task'))
    return dataset

def extract_number_from_solution(solution_text):
    """从解决方案文本中提取数字"""
    # 尝试不同的模式来提取数字
    patterns = [
        r'(\d+(?:,\d+)*)\s*$',  # 结尾的数字（可能有逗号）
        r'=\s*(\d+(?:,\d+)*)',  # 等号后的数字
        r'answer\s*is\s*(\d+(?:,\d+)*)',  # "answer is" 后的数字
        r'result\s*is\s*(\d+(?:,\d+)*)',  # "result is" 后的数字
        r'(\d+(?:,\d+)*)',  # 任何数字
    ]

    solution_str = str(solution_text).strip()

    for pattern in patterns:
        matches = re.findall(pattern, solution_str, re.IGNORECASE)
        if matches:
            # 取最后一个匹配（通常是最终答案）
            number_str = matches[-1].replace(',', '')
            try:
                return float(number_str)
            except ValueError:
                continue

    # 如果所有模式都失败，尝试直接转换
    try:
        return float(solution_str.replace(',', ''))
    except ValueError:
        # 如果仍然失败，返回 None
        return None

def evaluate_multiplication(example, prediction, trace=None):
    """Evaluate if predicted solution matches expected."""
    try:
        # 从预测结果中提取数字
        pred_num = extract_number_from_solution(prediction.solution)
        if pred_num is None:
            return 0

        exp = float(example.solution)
        return int(abs(pred_num - exp) < 0.01)
    except Exception as e:
        print(f"评估错误: {e}, 预测: {prediction.solution}")
        return 0

class LLMProgram(dspy.Module):
    def __init__(self):
        super().__init__()
        self.solver = dspy.ChainOfThought('task -> solution')

    def forward(self, task):
        return self.solver(task=task)

def quick_optimize():
    # 配置语言模型
    dspy.settings.configure(
        lm=dspy.LM(
            model="deepseek/deepseek-chat",
            api_key="sk-74c8da577a104fa78356b151d7a5536d"
        )
    )

    # 生成数据集（使用较小的数字范围以避免计算错误）
    dataset = [
        dspy.Example(task=f"{a}*{b}", solution=a*b).with_inputs('task')
        for a, b in zip(
            np.random.randint(1000, 10000, 100),  # 减少样本数量和数字大小
            np.random.randint(1000, 10000, 100)
        )
    ]

    train, val = dataset[:80], dataset[80:]  # 80/20 split

    # 使用修复的评估函数
    def safe_metric(e, p, trace=None):
        return evaluate_multiplication(e, p, trace)

    print("开始优化...")

    # 创建并编译程序
    llm_program = LLMProgram()

    try:
        compiled_llm_program = MIPROv2(
            metric=safe_metric,
            num_threads=10,  # 减少线程数
            auto='light'     # 使用较轻的优化设置，会自动设置其他参数
        ).compile(
            llm_program,
            trainset=train,
            valset=val,
            requires_permission_to_run=False
        )

        # 评估优化后的模型
        correct = 0
        total = len(val)
        for example in tqdm(val, desc="评估优化后的模型"):
            try:
                prediction = compiled_llm_program(example.task)
                correct += safe_metric(example, prediction)
            except Exception as e:
                print(f"预测错误: {e}")
                continue

        accuracy = correct / total
        print(f"优化后准确率: {accuracy:.1%}")

    except Exception as e:
        print(f"优化过程出错: {e}")
        return None

    # 评估未优化的模型
    print("评估未优化的模型...")
    student = MultiplicationSolver()
    correct = 0
    total = len(val)

    for example in tqdm(val, desc="评估未优化的模型"):
        try:
            prediction = student(example.task)
            correct += safe_metric(example, prediction)
        except Exception as e:
            print(f"未优化模型预测错误: {e}")
            continue

    accuracy = correct / total
    print(f"未优化准确率: {accuracy:.1%}")

    return compiled_llm_program

In [None]:
quick_optimize()

2025/06/13 04:51:35 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 20

2025/06/13 04:51:35 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/13 04:51:35 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/06/13 04:51:35 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


开始优化...
Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


  6%|▋         | 5/80 [01:19<19:46, 15.81s/it]


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Bootstrapping set 4/6


  1%|▏         | 1/80 [00:15<19:55, 15.13s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 5/6


  5%|▌         | 4/80 [00:51<16:16, 12.85s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/6


  6%|▋         | 5/80 [01:19<19:45, 15.81s/it]
2025/06/13 04:55:20 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/06/13 04:55:20 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.


2025/06/13 04:57:00 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/06/13 04:59:16 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/06/13 04:59:16 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given the fields `task`, produce the fields `solution`.

2025/06/13 04:59:16 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given a four-digit multiplication task in the format "A*B", provide a detailed step-by-step breakdown of the multiplication process (reasoning) and the final computed solution. 

For the reasoning, use the standard multiplication method: 
1. Break down the multiplication by each digit of the second number (B), starting from the rightmost digit (units place) to the leftmost digit (thousands place). 
2. For each digit, multiply it by the first number (A), adjusting for place value by adding the appropriate number of zeros (e.g., tens place = add one zero, hundreds place = add two zeros, etc.). 
3. Clearly label each

Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:42<00:00,  2.11s/it]

2025/06/13 04:59:59 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 04:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 100.0

2025/06/13 04:59:59 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 10 =====



Average Metric: 19.00 / 20 (95.0%): 100%|██████████| 20/20 [00:29<00:00,  1.48s/it]

2025/06/13 05:00:28 INFO dspy.evaluate.evaluate: Average Metric: 19 / 20 (95.0%)
2025/06/13 05:00:28 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/06/13 05:00:28 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0]
2025/06/13 05:00:28 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:00:28 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [01:25<00:00,  4.25s/it]

2025/06/13 05:01:54 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:01:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/06/13 05:01:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0]
2025/06/13 05:01:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:01:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:33<00:00,  1.66s/it]

2025/06/13 05:02:27 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/06/13 05:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0]
2025/06/13 05:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 10 =====



Average Metric: 19.00 / 20 (95.0%): 100%|██████████| 20/20 [00:33<00:00,  1.69s/it]

2025/06/13 05:03:01 INFO dspy.evaluate.evaluate: Average Metric: 19 / 20 (95.0%)
2025/06/13 05:03:01 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/06/13 05:03:01 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0]
2025/06/13 05:03:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:03:01 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:39<00:00,  1.99s/it]

2025/06/13 05:03:40 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:03:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0, 100.0]
2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:00<00:00, 1704.59it/s]

2025/06/13 05:03:41 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0, 100.0, 100.0]
2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:03:41 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:48<00:00,  2.41s/it]

2025/06/13 05:04:29 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/06/13 05:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0, 100.0, 100.0, 100.0]
2025/06/13 05:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:32<00:00,  1.65s/it]

2025/06/13 05:05:02 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0, 100.0, 100.0, 100.0, 100.0]
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:00<00:00, 1050.65it/s]

2025/06/13 05:05:02 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0, 100.0, 100.0, 100.0, 100.0, 100.0]
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 10 =====



Average Metric: 20.00 / 20 (100.0%): 100%|██████████| 20/20 [00:00<00:00, 2081.49it/s]

2025/06/13 05:05:02 INFO dspy.evaluate.evaluate: Average Metric: 20 / 20 (100.0%)
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 100.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [100.0, 95.0, 100.0, 100.0, 95.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 100.0


2025/06/13 05:05:02 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 100.0!





评估优化后的模型: 100%|██████████| 20/20 [00:00<00:00, 1102.40it/s]


优化后准确率: 100.0%
评估未优化的模型...


评估未优化的模型: 100%|██████████| 20/20 [06:13<00:00, 18.69s/it]

未优化准确率: 100.0%





solver.predict = Predict(StringSignature(task -> reasoning, solution
    instructions='Given the fields `task`, produce the fields `solution`.'
    task = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Task:', 'desc': '${task}'})
    reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_field_type': 'output'})
    solution = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Solution:', 'desc': '${solution}'})
))

# 新段落