In [2]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from scipy.signal import find_peaks
from google.colab import drive

# ✅ 参数设置
drive.mount('/content/drive')
start_index = 700
end_index = 731
range_tag = f"{start_index}-{end_index}"
BASE_PATH = "/content/drive/MyDrive/Cluster-proj"
LOGITS_PATH = f"{BASE_PATH}/output/llm_steps/whole_logits/deepseek7b-gsm-{range_tag}-hidden.json"
ERROR_INDEX_PATH = f"{BASE_PATH}/output/error_index/{range_tag}_hidden_index.json"
SAVE_PATH = f"{BASE_PATH}/output/step_wise_pca"
os.makedirs(SAVE_PATH, exist_ok=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:

# ✅ 加载数据
with open(LOGITS_PATH, "r") as f:
    logits_data = json.load(f)
with open(ERROR_INDEX_PATH, "r") as f:
    error_index_data = json.load(f)


In [38]:
error_index_data['q_721']

{'sampling2': {'first_error_sentence': 'What is your answer?',
  'error_reason': 'The sample answer does not address the question or perform any calculations; it simply asks a question, which is not an answer to the problem.',
  'first_error_token_index': 0,
  'last_error_token_index': 4}}

In [40]:
logits_data['q_721'].keys()


dict_keys(['question', 'true_whole_answer', 'true_final_result', 'sampling0', 'sampling1', 'sampling2'])

In [42]:
logits_data['q_721']['true_final_result']


'2200'

In [46]:
logits_data['q_721']['sampling0'].keys()

dict_keys(['whole_answer', 'token_probs', 'final_result'])

In [51]:
logits_data['q_721']['sampling0']['token_probs'][7].keys()

dict_keys(['token', 'chosen_prob', 'information_content', 'topk_info', 'hidden_vector'])

In [52]:
logits_data['q_721']['sampling0']['token_probs'][7]['topk_info']

{'indices': [634, 245, 0],
 'logits': [55.98214340209961, 55.84821701049805, -inf],
 'softmax': [0.5334315896034241, 0.46656835079193115, 0.0],
 'entropy': 0.9967726958495955,
 'information_content': [0.9066248316506884, 1.0998396480542127, 0.0]}

In [41]:
logits_data['q_721']['sampling0']['final_result']

'2200'

In [4]:
def compute_token_similarities(token_level):
    vectors = [entry["hidden_vector"] for entry in token_level if "hidden_vector" in entry]
    return [
        cosine_similarity([vectors[i]], [vectors[i + 1]])[0][0]
        for i in range(len(vectors) - 1)
    ]


In [5]:
def find_steps(similarities, distance=2, prominence=0.05):
    peaks, _ = find_peaks(-np.array(similarities), distance=distance, prominence=prominence)
    # print([0] + peaks.tolist() + [len(similarities)])
    return [0] + peaks.tolist() + [len(similarities)]  # 保证首尾闭合

In [6]:
def map_error_index_to_steps(similarities, error_start, error_end, steps):
    # step_bounds = find_steps(similarities)
    error_steps = set()
    for i in range(len(steps) - 1):
        s, e = steps[i], steps[i+1]
        if any(error_start <= j <= error_end for j in range(s, e)):
            error_steps.add(i)
    return error_steps


In [7]:
# 快速检查前5个 token 是否包含 entropy 字段
for qid, sample in logits_data.items():
    for sid in ["sampling0", "sampling1", "sampling2"]:
        if sid in sample and "token_probs" in sample[sid]:
            tokens = sample[sid]["token_probs"]
            for i, tok in enumerate(tokens[:5]):
                print(f"[{qid}][{sid}][{i}] keys = {list(tok.keys())}")
            break
    break


[q_700][sampling0][0] keys = ['token', 'chosen_prob', 'information_content', 'topk_info', 'hidden_vector']
[q_700][sampling0][1] keys = ['token', 'chosen_prob', 'information_content', 'topk_info', 'hidden_vector']
[q_700][sampling0][2] keys = ['token', 'chosen_prob', 'information_content', 'topk_info', 'hidden_vector']
[q_700][sampling0][3] keys = ['token', 'chosen_prob', 'information_content', 'topk_info', 'hidden_vector']
[q_700][sampling0][4] keys = ['token', 'chosen_prob', 'information_content', 'topk_info', 'hidden_vector']


In [53]:
# ✅ 输出 JSON 路径
json_save_path = os.path.join(SAVE_PATH, f"stepwise_features_by_token_range_{range_tag}.json")

# ✅ 创建总字典
qid_to_step_data = {}

for qid, sample in logits_data.items():
    for sid in ["sampling0", "sampling1", "sampling2"]:
        if sid not in sample or "token_probs" not in sample[sid]:
            continue

        token_level = sample[sid]["token_probs"]
        similarities = compute_token_similarities(token_level)
        steps = find_steps(similarities)

        step_info_list = []
        for i in range(len(steps) - 1):
            s_idx, e_idx = steps[i], steps[i+1]

            vecs = [
                token_level[j]["hidden_vector"]
                for j in range(s_idx, e_idx)
                if "hidden_vector" in token_level[j]
            ]
            ents = [
                token_level[j]["topk_info"]["entropy"]
                for j in range(s_idx, e_idx)
                if "topk_info" in token_level[j] and "entropy" in token_level[j]["topk_info"]
            ]


            if not vecs:
                continue  # skip if no vectors

            step_key = f"step_{i}"
            step_info = {
                "token_range": f"{s_idx}-{e_idx}",
                "vec_mean": np.mean(vecs, axis=0).tolist(),
                "entropy_mean": float(np.mean(ents)) if ents else 0.0,
            }

            # 如果是负样本并且有 error index，可标注是否为 error step
            if qid in error_index_data and sid in error_index_data[qid]:
                err_info = error_index_data[qid][sid]
                err_start = err_info.get("first_error_token_index")
                err_end = err_info.get("last_error_token_index")
                if err_start is not None and err_end is not None:
                  try:
                      error_steps = map_error_index_to_steps(similarities, err_start, err_end, steps)
                      if not isinstance(error_steps, set):
                          raise ValueError("error_steps 不是 set 类型")
                      step_info["is_error_step"] = i in error_steps
                  except Exception as e:
                      print(f"❌ map_error_index_to_steps 出错: qid={qid}, sid={sid}, err=({err_start}, {err_end}) -> {e}")
                      break  # 跳出当前 sampling 的所有 step（避免记录错误数据）

            # 保存到主结构
            if qid not in qid_to_step_data:
                qid_to_step_data[qid] = {}
            if sid not in qid_to_step_data[qid]:
                qid_to_step_data[qid][sid] = {}
            qid_to_step_data[qid][sid][step_key] = step_info

# ✅ 保存为 JSON 文件
with open(json_save_path, "w") as f:
    json.dump(qid_to_step_data, f, indent=2)

print(f"✅ step-wise 信息已保存到: {json_save_path}")


✅ step-wise 信息已保存到: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/stepwise_features_by_token_range_700-731.json


In [10]:

# ✅ 所有可能配对的 question-sampling 对列表
paired_qid_sids = []

for qid, sid_dict in error_index_data.items():
    all_sids = {"sampling0", "sampling1", "sampling2"}
    error_sids = set(sid_dict.keys())
    correct_sids = all_sids - error_sids

    # 至少有一对正负组合
    if error_sids and correct_sids:
        for err_sid in error_sids:
            for cor_sid in correct_sids:
                paired_qid_sids.append((qid, err_sid, cor_sid))  # (qid, neg_sid, pos_sid)

# ✅ 打印所有有效的 (qid, neg_sid, pos_sid) 配对
print(f"✅ 共找到 {len(paired_qid_sids)} 个正负样本配对:")
for item in paired_qid_sids:
    print(item)


✅ 共找到 38 个正负样本配对:
('q_700', 'sampling2', 'sampling1')
('q_700', 'sampling0', 'sampling1')
('q_701', 'sampling1', 'sampling2')
('q_701', 'sampling1', 'sampling0')
('q_703', 'sampling1', 'sampling2')
('q_703', 'sampling1', 'sampling0')
('q_705', 'sampling2', 'sampling0')
('q_705', 'sampling1', 'sampling0')
('q_707', 'sampling2', 'sampling1')
('q_707', 'sampling0', 'sampling1')
('q_709', 'sampling2', 'sampling1')
('q_709', 'sampling0', 'sampling1')
('q_710', 'sampling1', 'sampling2')
('q_710', 'sampling1', 'sampling0')
('q_711', 'sampling0', 'sampling2')
('q_711', 'sampling0', 'sampling1')
('q_712', 'sampling1', 'sampling2')
('q_712', 'sampling0', 'sampling2')
('q_714', 'sampling1', 'sampling2')
('q_714', 'sampling0', 'sampling2')
('q_715', 'sampling1', 'sampling2')
('q_715', 'sampling1', 'sampling0')
('q_717', 'sampling1', 'sampling2')
('q_717', 'sampling1', 'sampling0')
('q_718', 'sampling2', 'sampling1')
('q_718', 'sampling2', 'sampling0')
('q_720', 'sampling1', 'sampling2')
('q_720', 

In [8]:
import json
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# ✅ 加载 stepwise 特征文件
json_path = os.path.join(SAVE_PATH, f"stepwise_features_by_token_range_{range_tag}.json")
with open(json_path, "r") as f:
    step_data = json.load(f)



In [34]:
step_data['q_703']['sampling0'].keys()

dict_keys(['step_0', 'step_1', 'step_2', 'step_3', 'step_4', 'step_5', 'step_6', 'step_7', 'step_8', 'step_9', 'step_10', 'step_11', 'step_12', 'step_13', 'step_14', 'step_15', 'step_16', 'step_17', 'step_18', 'step_19', 'step_20', 'step_21', 'step_22', 'step_23', 'step_24', 'step_25', 'step_26', 'step_27', 'step_28', 'step_29', 'step_30', 'step_31', 'step_32', 'step_33', 'step_34', 'step_35', 'step_36', 'step_37', 'step_38', 'step_39', 'step_40', 'step_41', 'step_42', 'step_43', 'step_44', 'step_45', 'step_46', 'step_47', 'step_48', 'step_49', 'step_50', 'step_51', 'step_52', 'step_53', 'step_54', 'step_55', 'step_56', 'step_57', 'step_58', 'step_59', 'step_60', 'step_61', 'step_62', 'step_63', 'step_64', 'step_65'])

In [35]:
step_data['q_703'].keys()

dict_keys(['sampling0', 'sampling1', 'sampling2'])

In [36]:
step_data['q_703']['sampling1'].keys()

dict_keys(['step_0', 'step_1', 'step_2', 'step_3', 'step_4', 'step_5', 'step_6', 'step_7', 'step_8', 'step_9', 'step_10', 'step_11', 'step_12', 'step_13', 'step_14', 'step_15', 'step_16', 'step_17', 'step_18', 'step_19', 'step_20', 'step_21', 'step_22', 'step_23', 'step_24', 'step_25', 'step_26', 'step_27', 'step_28', 'step_29', 'step_30', 'step_31', 'step_32', 'step_33', 'step_34', 'step_35', 'step_36', 'step_37', 'step_38', 'step_39', 'step_40', 'step_41', 'step_42', 'step_43', 'step_44', 'step_45', 'step_46', 'step_47', 'step_48', 'step_49', 'step_50', 'step_51', 'step_52', 'step_53', 'step_54', 'step_55', 'step_56', 'step_57', 'step_58', 'step_59', 'step_60', 'step_61', 'step_62', 'step_63', 'step_64', 'step_65', 'step_66', 'step_67', 'step_68', 'step_69', 'step_70', 'step_71', 'step_72', 'step_73', 'step_74', 'step_75', 'step_76', 'step_77', 'step_78'])

In [None]:
step_data['q_703']['sampling1']

In [37]:
for qid, neg_sid, pos_sid in paired_qid_sids:
    if qid not in step_data:
        print(f"⚠️ qid {qid} 不在 step_data 中，跳过")
        continue
    if neg_sid not in step_data[qid] or pos_sid not in step_data[qid]:
        print(f"⚠️ qid {qid} 缺失 sid 数据，跳过")
        continue

    # ✅ 检查 error_index 是否有效
    err_info = error_index_data.get(qid, {}).get(neg_sid, {})
    if err_info.get("first_error_token_index", 0) == -1 or err_info.get("last_error_token_index", 0) == -1:
        print(f"⚠️ qid {qid} 的 {neg_sid} error index 为 -1，跳过")
        continue

    neg_steps = step_data[qid][neg_sid]
    pos_steps = step_data[qid][pos_sid]

    # ✅ 提取 step_i 编号进行对齐（适配 step_0 格式）
    neg_index_to_key = {k.replace("step_", ""): k for k in neg_steps}
    pos_index_to_key = {k.replace("step_", ""): k for k in pos_steps}
    common_step_indices = sorted(set(neg_index_to_key.keys()) & set(pos_index_to_key.keys()))

    if len(common_step_indices) < 1:
        print(f"⚠️ qid {qid} 没有对齐的 step_i，跳过")
        continue

    # === 收集向量和错误标记 ===
    vecs_neg, vecs_pos, error_info = [], [], []
    for idx in common_step_indices:
        step_key_neg = neg_index_to_key[idx]
        step_key_pos = pos_index_to_key[idx]

        try:
            vec_neg = np.array(neg_steps[step_key_neg]["vec_mean"])
            vec_pos = np.array(pos_steps[step_key_pos]["vec_mean"])
            is_error = neg_steps[step_key_neg].get("is_error_step", False)
        except KeyError:
            continue

        vecs_neg.append(vec_neg)
        vecs_pos.append(vec_pos)
        error_info.append((idx, is_error))

    # === PCA 降维 ===
    X = np.concatenate([vecs_neg, vecs_pos], axis=0)
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(X)
    explained = pca.explained_variance_ratio_

    # === 可视化 ===
    plt.figure(figsize=(10, 8))
    plt.scatter([], [], color='red', label="Negative Error")
    plt.scatter([], [], color='green', label="Negative Correct")
    plt.scatter([], [], color='blue', label="Positive")

    for i, (step_idx, is_error) in enumerate(error_info):
        label = f"step_{step_idx}"
        idx_neg = i
        idx_pos = i + len(error_info)

        # 正样本
        plt.scatter(reduced[idx_pos, 0], reduced[idx_pos, 1], color='blue', alpha=0.7)
        plt.text(reduced[idx_pos, 0] + 0.2, reduced[idx_pos, 1], label, fontsize=8, color='blue')

        # 负样本
        color = 'red' if is_error else 'green'
        plt.scatter(reduced[idx_neg, 0], reduced[idx_neg, 1], color=color, alpha=0.7)
        plt.text(reduced[idx_neg, 0] + 0.2, reduced[idx_neg, 1], label, fontsize=8, color=color)

        # 虚线连接
        plt.plot(
            [reduced[idx_neg, 0], reduced[idx_pos, 0]],
            [reduced[idx_neg, 1], reduced[idx_pos, 1]],
            linestyle="--", color="gray", linewidth=0.5
        )

    # ✅ 图标题标注哪个是错误的 sampling
    title = (
        f"Step-wise PCA Comparison\n"
        f"QID: {qid} | ❌ Error: {neg_sid} | ✅ Correct: {pos_sid}\n"
        f"Explained: {explained[0]:.2f}, {explained[1]:.2f}"
    )
    plt.title(title)
    plt.xlabel("PC1")
    plt.ylabel("PC2")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # === 保存图像 ===
    filename = f"pca_steps_{qid}_{neg_sid}_vs_{pos_sid}.png"
    img_dir = os.path.join(SAVE_PATH, "img2")
    os.makedirs(img_dir, exist_ok=True)
    save_fp = os.path.join(img_dir, filename)
    plt.savefig(save_fp)
    plt.close()
    print(f"✅ 图保存完成: {save_fp}")


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_700_sampling0_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_700_sampling2_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_701_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_701_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_703_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_703_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_705_sampling2_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_705_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_707_sampling0_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_707_sampling2_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_709_sampling0_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_709_sampling2_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_710_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_710_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_711_sampling0_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_711_sampling0_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_712_sampling0_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_712_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_714_sampling0_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_714_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_715_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_715_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_717_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_717_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_718_sampling2_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_718_sampling2_vs_sampling1.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_720_sampling1_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_720_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_721_sampling2_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_721_sampling2_vs_sampling1.png
⚠️ qid q_723 的 sampling2 error index 为 -1，跳过
⚠️ qid q_723 的 sampling2 error index 为 -1，跳过


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_724_sampling0_vs_sampling1.png
⚠️ qid q_724 的 sampling2 error index 为 -1，跳过


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_727_sampling0_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_727_sampling1_vs_sampling2.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_730_sampling2_vs_sampling0.png


  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_fp)
  plt.savefig(save_fp)


✅ 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img2/pca_steps_q_730_sampling2_vs_sampling1.png


In [12]:
import matplotlib.pyplot as plt
import numpy as np
import os

# ✅ 保存目录
ent_img_dir = os.path.join(SAVE_PATH, "img_entropy3")
os.makedirs(ent_img_dir, exist_ok=True)

# ✅ 遍历每个正负配对
for qid, neg_sid, pos_sid in paired_qid_sids:
    if qid not in step_data or neg_sid not in step_data[qid] or pos_sid not in step_data[qid]:
        continue

    neg_steps = step_data[qid][neg_sid]
    pos_steps = step_data[qid][pos_sid]

    # ✅ 提取 step_i 编号并按数字排序
    neg_index_to_key = {k.replace("step_", ""): k for k in neg_steps}
    pos_index_to_key = {k.replace("step_", ""): k for k in pos_steps}
    common_step_indices = sorted(
        set(neg_index_to_key.keys()) & set(pos_index_to_key.keys()),
        key=lambda x: int(x)
    )

    if len(common_step_indices) < 1:
        continue

    # ✅ 提取 entropy 和错误标记
    ent_pos, ent_neg, is_error_flags = [], [], []
    for idx in common_step_indices:
        k_neg = neg_index_to_key[idx]
        k_pos = pos_index_to_key[idx]
        try:
            ent_pos.append(pos_steps[k_pos]["entropy_mean"])
            ent_neg.append(neg_steps[k_neg]["entropy_mean"])
            is_error = neg_steps[k_neg].get("is_error_step", False)
            is_error_flags.append(is_error)
        except:
            continue

    # ✅ 横轴标签也按正确顺序构造
    labels = [f"step_{idx}" for idx in common_step_indices]

    if not ent_pos or not ent_neg:
        continue

    # ✅ 找出 error 起点
    error_start_idx = None
    for i, flag in enumerate(is_error_flags):
        if flag:
            error_start_idx = i
            break

    # === 画图 ===
    x = np.arange(len(labels))
    plt.figure(figsize=(10, 5))

    # ✅ 正样本：蓝线
    plt.plot(x, ent_pos, marker='o', color='blue', label="Positive")

    # ✅ 负样本分段线（绿 ➝ 红）
    if error_start_idx is None:
        plt.plot(x, ent_neg, marker='o', color='green', label="Negative (Correct)")
    else:
        if error_start_idx > 0:
            plt.plot(x[:error_start_idx], ent_neg[:error_start_idx], marker='o', color='green', label="Negative (Correct)")
        plt.plot(x[error_start_idx:], ent_neg[error_start_idx:], marker='o', color='red', label="Negative (Error)")

    plt.xticks(x, labels, rotation=45)
    plt.xlabel("Step Index")
    plt.ylabel("Mean Entropy")
    plt.title(f"Step Entropy Comparison\nQID: {qid} | Positive: {pos_sid} vs Negative: {neg_sid}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # ✅ 保存图像
    save_path = os.path.join(ent_img_dir, f"entropy_curve_{qid}_{neg_sid}_vs_{pos_sid}.png")
    plt.savefig(save_path)
    plt.close()
    print(f"📊 Entropy 图保存完成: {save_path}")


📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_700_sampling2_vs_sampling1.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_700_sampling0_vs_sampling1.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_701_sampling1_vs_sampling2.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_701_sampling1_vs_sampling0.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_703_sampling1_vs_sampling2.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_703_sampling1_vs_sampling0.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/step_wise_pca/img_entropy3/entropy_curve_q_705_sampling2_vs_sampling0.png
📊 Entropy 图保存完成: /content/drive/MyDrive/Cluster-proj/output/st