In [1]:
import dspy
import json
from dspy.teleprompt import MIPROv2, BootstrapFinetune
from src.dsp_model import FaultExtractor
from src.bert_utils import bert_similarity_metric
from dspy import Example
import os

In [2]:
# Step 1: 加载数据

with open("../raw_data/故障类.json", "r", encoding="utf-8-sig") as f:
    samples = json.load(f)
    
samples[:2]

[{'input': '西北实习生李龙。你好天山站，杨立斌向您回令。喂啥那个啥？是那个操操作天哈，一线线路保护投入的。保护投入了稍等我给你转一下。嗯，好。',
  'faulty_call': '1',
  'extract': {'fault_equipment': '无',
   'fault_time': '无',
   'region': '无',
   'voltage_level': '无',
   'weather_condition': '无',
   'fault_reason_and_check_result': '无',
   'fault_recovery_time': '无',
   'illustrate': '天哈，一线线路保护投入',
   'line_name': '天哈，一线线路',
   'power_supply_time': '无',
   'fault_phase': '无',
   'protect_info': '无',
   'plant_station_name': '天山站',
   'bus_name': '无',
   'bus_type': '无',
   'handling_status': '无',
   'detailed_description': '无',
   'expecteddefect_elimination_time': '无',
   'protection_action': '操操作天哈，一线线路保护投入',
   'trip_details': '无',
   'unit_num': '无',
   'manufacturer': '无',
   'production_date': '无'}},
 {'input': '你好哈，密变王勇。你要干什么？回令退与我们这有个投保护的。嗯，回去是吧，一会我给你回过去吧。行好的。再见。',
  'faulty_call': '1',
  'extract': {'fault_equipment': '无',
   'fault_time': '无',
   'region': '无',
   'voltage_level': '无',
   'weather_condition': '无',
   'fault_reason_and_che

In [3]:
# Step 2: 构建 DSPy Example 数据集
trainset = []
for sample in samples:
    example = Example({
        'input': sample['input'],
        **sample['extract']
    }).with_inputs('input')
    trainset.append(example)
    
trainset[:2]

[Example({'input': '西北实习生李龙。你好天山站，杨立斌向您回令。喂啥那个啥？是那个操操作天哈，一线线路保护投入的。保护投入了稍等我给你转一下。嗯，好。', 'fault_equipment': '无', 'fault_time': '无', 'region': '无', 'voltage_level': '无', 'weather_condition': '无', 'fault_reason_and_check_result': '无', 'fault_recovery_time': '无', 'illustrate': '天哈，一线线路保护投入', 'line_name': '天哈，一线线路', 'power_supply_time': '无', 'fault_phase': '无', 'protect_info': '无', 'plant_station_name': '天山站', 'bus_name': '无', 'bus_type': '无', 'handling_status': '无', 'detailed_description': '无', 'expecteddefect_elimination_time': '无', 'protection_action': '操操作天哈，一线线路保护投入', 'trip_details': '无', 'unit_num': '无', 'manufacturer': '无', 'production_date': '无'}) (input_keys={'input'}),
 Example({'input': '你好哈，密变王勇。你要干什么？回令退与我们这有个投保护的。嗯，回去是吧，一会我给你回过去吧。行好的。再见。', 'fault_equipment': '无', 'fault_time': '无', 'region': '无', 'voltage_level': '无', 'weather_condition': '无', 'fault_reason_and_check_result': '无', 'fault_recovery_time': '无', 'illustrate': '投保护', 'line_name': '无', 'power_supply_time': '无', 'fau

In [4]:
# Step 3: 划分训练集和验证集
from sklearn.model_selection import train_test_split

trainset, valset = train_test_split(trainset, test_size=0.2, random_state=42)

In [5]:
trainset[:2], valset[:2]

([Example({'input': '七五。如果墙面于海龙，向您回令。你说。操作单位若羌变已完成六六千伏2号3号10号11号低压电抗器运行转热备用的操作时间是19点54分。好，现在站内电压运行正常了嘛。正常了，现在。注意电压监视再见。好再见。', 'fault_equipment': '六六千伏2号3号10号11号低压电抗器', 'fault_time': '19点54分', 'region': '无', 'voltage_level': '六六千伏', 'weather_condition': '无', 'fault_reason_and_check_result': '无', 'fault_recovery_time': '无', 'illustrate': '已完成六六千伏2号3号10号11号低压电抗器运行转热备用', 'line_name': '无', 'power_supply_time': '无', 'fault_phase': '无', 'protect_info': '无', 'plant_station_name': '若羌变', 'bus_name': '无', 'bus_type': '无', 'handling_status': '站内电压运行正常', 'detailed_description': '无', 'expecteddefect_elimination_time': '无', 'protection_action': '完成六六千伏2号3号10号11号低压电抗器运行转热备用', 'trip_details': '无', 'unit_num': '无', 'manufacturer': '无', 'production_date': '无'}) (input_keys={'input'}),
  Example({'input': '喂，你好宁夏孙原。对。西北郭亚慧刚要报完工是吧，你说先说灵州的吧。您稍等。嗯嗯嗯，嗯嗯嗯，你好宁夏田波我这边给您对。灵州的这张。好的，我来念吗？嗯，你说吧。好的，那个分中心检修票编号NW-2025-0672。停电项目名称灵州灵州协控总站PCS-992稳控装置一灵州协控主站PCS-992稳控装置2。稳控装置几？稳控装置1，两个都是稳控装置1，一个是协控总站稳控装置1，一个协控主站稳控装置1，工作内容是

In [6]:
# Step 4: 设置语言模型（可使用本地模型如 Qwen）
from dotenv import load_dotenv

load_dotenv()  # 加载 .env 文件中的变量

True

In [7]:
lm = dspy.LM('ollama_chat/qwen2.5:32b', api_base=os.getenv("OLLAMA_A800_API_BASE"), api_key='', cache=False)
dspy.configure(lm=lm)

# Step 5: 实例化模型
model = FaultExtractor()

In [8]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

# 配置参数
config = dict(
    max_labeled_demos=4,           # 每个模块最多使用4个标注样本作为示例
    max_bootstrapped_demos=4,      # 模型自动生成最多4个示例
    num_candidate_programs=10,     # 生成候选程序并选择最优
    num_threads=20,                # 并行线程数（根据CPU/GPU配置调整）
)

# 创建优化器
teleprompter = BootstrapFewShotWithRandomSearch(
    metric=bert_similarity_metric,  # 评估函数
    **config
)

Going to sample between 1 and 4 traces per predictor.
Will attempt to bootstrap 10 candidate sets.


In [9]:
# 开始优化
optimized_module = teleprompter.compile(model, trainset=trainset)

Average Metric: 42.80 / 93 (46.0%): 100%|██████████| 93/93 [10:59<00:00,  7.09s/it]              

2025/07/23 15:45:02 INFO dspy.evaluate.evaluate: Average Metric: 42.79793930053711 / 93 (46.0%)



New best score: 46.02 for seed -3
Scores so far: [np.float32(46.02)]
Best score so far: 46.02000045776367
Average Metric: 77.63 / 93 (83.5%): 100%|██████████| 93/93 [10:52<00:00,  7.01s/it]             

2025/07/23 15:55:54 INFO dspy.evaluate.evaluate: Average Metric: 77.63197326660156 / 93 (83.5%)



New best score: 83.48 for seed -2
Scores so far: [np.float32(46.02), np.float32(83.48)]
Best score so far: 83.4800033569336


  4%|▍         | 4/93 [01:34<34:54, 23.54s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Average Metric: 72.30 / 93 (77.69999694824219%): 100%|██████████| 93/93 [12:57<00:00,  8.36s/it]

2025/07/23 16:10:26 INFO dspy.evaluate.evaluate: Average Metric: 72.29702758789062 / 93 (77.69999694824219%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74)]
Best score so far: 83.4800033569336


  4%|▍         | 4/93 [01:32<34:21, 23.16s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Average Metric: 72.59 / 93 (78.0999984741211%): 100%|██████████| 93/93 [12:49<00:00,  8.27s/it] 

2025/07/23 16:24:48 INFO dspy.evaluate.evaluate: Average Metric: 72.59283447265625 / 93 (78.0999984741211%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06)]
Best score so far: 83.4800033569336


  2%|▏         | 2/93 [00:54<41:35, 27.43s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Average Metric: 76.80 / 93 (82.5999984741211%): 100%|██████████| 93/93 [12:09<00:00,  7.84s/it] 

2025/07/23 16:37:53 INFO dspy.evaluate.evaluate: Average Metric: 76.79571533203125 / 93 (82.5999984741211%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58)]
Best score so far: 83.4800033569336


  1%|          | 1/93 [00:29<44:41, 29.15s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Average Metric: 73.67 / 93 (79.19999694824219%): 100%|██████████| 93/93 [10:50<00:00,  7.00s/it]

2025/07/23 16:49:12 INFO dspy.evaluate.evaluate: Average Metric: 73.67460632324219 / 93 (79.19999694824219%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22)]
Best score so far: 83.4800033569336


  2%|▏         | 2/93 [00:50<38:25, 25.33s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Average Metric: 72.68 / 93 (78.0999984741211%): 100%|██████████| 93/93 [12:30<00:00,  8.07s/it] 

2025/07/23 17:02:34 INFO dspy.evaluate.evaluate: Average Metric: 72.67671203613281 / 93 (78.0999984741211%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15)]
Best score so far: 83.4800033569336


  2%|▏         | 2/93 [00:53<40:48, 26.91s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Average Metric: 71.83 / 93 (77.19999694824219%): 100%|██████████| 93/93 [12:30<00:00,  8.07s/it]

2025/07/23 17:15:58 INFO dspy.evaluate.evaluate: Average Metric: 71.8294906616211 / 93 (77.19999694824219%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15), np.float32(77.24)]
Best score so far: 83.4800033569336


  3%|▎         | 3/93 [01:11<35:57, 23.97s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Average Metric: 72.37 / 93 (77.80000305175781%): 100%|██████████| 93/93 [11:50<00:00,  7.64s/it]

2025/07/23 17:29:01 INFO dspy.evaluate.evaluate: Average Metric: 72.36868286132812 / 93 (77.80000305175781%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15), np.float32(77.24), np.float32(77.82)]
Best score so far: 83.4800033569336


  1%|          | 1/93 [00:29<45:33, 29.71s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Average Metric: 74.10 / 93 (79.69999694824219%): 100%|██████████| 93/93 [11:52<00:00,  7.66s/it]

2025/07/23 17:41:23 INFO dspy.evaluate.evaluate: Average Metric: 74.1009292602539 / 93 (79.69999694824219%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15), np.float32(77.24), np.float32(77.82), np.float32(79.68)]
Best score so far: 83.4800033569336


  3%|▎         | 3/93 [01:15<37:49, 25.22s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Average Metric: 75.46 / 93 (81.0999984741211%): 100%|██████████| 93/93 [13:10<00:00,  8.50s/it] 

2025/07/23 17:55:50 INFO dspy.evaluate.evaluate: Average Metric: 75.46284484863281 / 93 (81.0999984741211%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15), np.float32(77.24), np.float32(77.82), np.float32(79.68), np.float32(81.14)]
Best score so far: 83.4800033569336


  2%|▏         | 2/93 [00:51<39:02, 25.74s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Average Metric: 77.15 / 93 (83.0%): 100%|██████████| 93/93 [11:19<00:00,  7.31s/it]             

2025/07/23 18:08:01 INFO dspy.evaluate.evaluate: Average Metric: 77.15186309814453 / 93 (83.0%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15), np.float32(77.24), np.float32(77.82), np.float32(79.68), np.float32(81.14), np.float32(82.96)]
Best score so far: 83.4800033569336


  4%|▍         | 4/93 [01:34<34:58, 23.58s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Average Metric: 67.69 / 93 (72.80000305175781%): 100%|██████████| 93/93 [12:32<00:00,  8.09s/it]

2025/07/23 18:22:07 INFO dspy.evaluate.evaluate: Average Metric: 67.68534851074219 / 93 (72.80000305175781%)



Scores so far: [np.float32(46.02), np.float32(83.48), np.float32(77.74), np.float32(78.06), np.float32(82.58), np.float32(79.22), np.float32(78.15), np.float32(77.24), np.float32(77.82), np.float32(79.68), np.float32(81.14), np.float32(82.96), np.float32(72.78)]
Best score so far: 83.4800033569336
13 candidate programs found.


In [10]:
# 保存模型
model_path = "optimized_fault_extractor.json"
optimized_module.save(model_path)
print("模型已保存至:", model_path)

模型已保存至: optimized_fault_extractor.json


In [13]:
from dspy.evaluate import Evaluate

# 创建评估器
evaluate = Evaluate(devset=valset, metric=bert_similarity_metric, num_threads=5, display_progress=True)

# 评估优化前模块
print("优化前模块评分：")
evaluate(model)

# 评估优化后模块
print("优化后模块评分：")
evaluate(optimized_module)

优化前模块评分：
Average Metric: 12.63 / 24 (52.599998474121094%): 100%|██████████| 24/24 [03:09<00:00,  7.88s/it]

2025/07/23 18:29:47 INFO dspy.evaluate.evaluate: Average Metric: 12.631243705749512 / 24 (52.599998474121094%)



优化后模块评分：
Average Metric: 0.08 / 1 (8.0%):   4%|▍         | 1/24 [08:28<3:14:52, 508.38s/it]  9.73s/it]
Average Metric: 19.95 / 24 (83.0999984741211%): 100%|██████████| 24/24 [03:12<00:00,  8.01s/it] 

2025/07/23 18:32:59 INFO dspy.evaluate.evaluate: Average Metric: 19.954309463500977 / 24 (83.0999984741211%)





np.float32(83.14)