In [2]:
# ✅ 参数设置
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
import os
import json
import numpy as np
from sklearn.metrics import classification_report

# ✅ 路径设置
BASE_PATH = "/content/drive/MyDrive/Cluster-proj"
range_tag = "700-731"
DYNAMIC_PATH = f"{BASE_PATH}/output/step_cross2/sequential_dynamics_{range_tag}.json"
INDEX_PATH = f"{BASE_PATH}/output/error_fix_index/deepseek-7b-{range_tag}_error_fix_index.json"
SAVE_PATH = f"{BASE_PATH}/output/step_cross2/detector_results_{range_tag}.json"

# ✅ 阈值设置（来自统计分析）
ENTROPY_THRESH = 0.55
COHESION_THRESH = 0.3

# ✅ 加载数据
with open(DYNAMIC_PATH, "r") as f:
    sequential_data = json.load(f)

with open(INDEX_PATH, "r") as f:
    index_data = json.load(f)

# ✅ Step-based 模式
step_based_preds = {}

for step in sequential_data["error_regions"] + sequential_data["fix_regions"]:
    qid, sid = step["qid"], step["sid"]
    is_error_step = step.get("is_error_step", False)
    metrics = step["metrics"]
    entropy_change_mean = metrics.get("entropy_change_mean", 0.0)
    cohesion = metrics.get("cluster_cohesion", 0.0)

    # pred_is_error = (entropy_mean >= ENTROPY_THRESH) or (cohesion >= COHESION_THRESH)
    pred_is_error = (entropy_change_mean >= ENTROPY_THRESH)

    key = f"{qid}_{sid}"
    if key not in step_based_preds:
        step_based_preds[key] = {"pred_is_error": [], "true_is_error": False}

    step_based_preds[key]["pred_is_error"].append(pred_is_error)

    if is_error_step:
        step_based_preds[key]["true_is_error"] = True

# ✅ 汇总 per sampling 的预测结果
sampling_predictions = []
for key, info in step_based_preds.items():
    # 任意一个 step 判定为 error，就认为这个 sampling 判定为 error
    pred_is_error = any(info["pred_is_error"])
    true_is_error = info["true_is_error"]
    sampling_predictions.append({
        "key": key,
        "pred_is_error": pred_is_error,
        "true_is_error": true_is_error
    })

# ✅ 分类报告
y_true = [x["true_is_error"] for x in sampling_predictions]
y_pred = [x["pred_is_error"] for x in sampling_predictions]
print("📊 Step-based Detector Performance:")
print(classification_report(y_true, y_pred, digits=3))

# ✅ 保存结果
with open(SAVE_PATH, "w") as f:
    json.dump(sampling_predictions, f, indent=2)
print(f"✅ Step-based detector results saved to: {SAVE_PATH}")


📊 Step-based Detector Performance:
              precision    recall  f1-score   support

       False      0.514     0.947     0.667        19
        True      0.889     0.320     0.471        25

    accuracy                          0.591        44
   macro avg      0.702     0.634     0.569        44
weighted avg      0.727     0.591     0.555        44

✅ Step-based detector results saved to: /content/drive/MyDrive/Cluster-proj/output/step_cross2/detector_results_700-731.json


In [6]:

# ✅ Step-based 模式
step_based_preds = {}

for step in sequential_data["error_regions"] + sequential_data["fix_regions"]:
    qid, sid = step["qid"], step["sid"]
    is_error_step = step.get("is_error_step", False)
    metrics = step["metrics"]
    entropy_mean = metrics.get("entropy_change_mean", 0.0)
    cohesion = metrics.get("cluster_cohesion", 0.0)

    # pred_is_error = (entropy_mean >= ENTROPY_THRESH) or (cohesion >= COHESION_THRESH)
    pred_is_error = (cohesion >= COHESION_THRESH)

    key = f"{qid}_{sid}"
    if key not in step_based_preds:
        step_based_preds[key] = {"pred_is_error": [], "true_is_error": False}

    step_based_preds[key]["pred_is_error"].append(pred_is_error)

    if is_error_step:
        step_based_preds[key]["true_is_error"] = True

# ✅ 汇总 per sampling 的预测结果
sampling_predictions = []
for key, info in step_based_preds.items():
    # 任意一个 step 判定为 error，就认为这个 sampling 判定为 error
    pred_is_error = any(info["pred_is_error"])
    true_is_error = info["true_is_error"]
    sampling_predictions.append({
        "key": key,
        "pred_is_error": pred_is_error,
        "true_is_error": true_is_error
    })

# ✅ 分类报告
y_true = [x["true_is_error"] for x in sampling_predictions]
y_pred = [x["pred_is_error"] for x in sampling_predictions]
print("📊 Step-based Detector Performance:")
print(classification_report(y_true, y_pred, digits=3))

# ✅ 保存结果
with open(SAVE_PATH, "w") as f:
    json.dump(sampling_predictions, f, indent=2)
print(f"✅ Step-based detector results saved to: {SAVE_PATH}")


📊 Step-based Detector Performance:
              precision    recall  f1-score   support

       False      0.432     1.000     0.603        19
        True      0.000     0.000     0.000        25

    accuracy                          0.432        44
   macro avg      0.216     0.500     0.302        44
weighted avg      0.186     0.432     0.260        44

✅ Step-based detector results saved to: /content/drive/MyDrive/Cluster-proj/output/step_cross2/detector_results_700-731.json


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
