In [16]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
from scipy.signal import argrelextrema
from matplotlib.backends.backend_pdf import PdfPages
from google.colab import drive
drive.mount('/content/drive')

# ✅ 参数配置
start_index = 700
end_index   = 731
range_tag   = f"{start_index}-{end_index}"

BASE_PATH         = "/content/drive/MyDrive/Cluster-proj"
LOGITS_PATH       = f"{BASE_PATH}/output/llm_steps/whole_logits/deepseek7b-gsm-{range_tag}-hidden.json"
ERROR_INDEX_PATH  = f"{BASE_PATH}/output/error_index/{range_tag}_hidden_index.json"

# ✅ 加载数据
with open(LOGITS_PATH, "r") as f:
    logits_data = json.load(f)
with open(ERROR_INDEX_PATH, "r") as f:
    error_index_data = json.load(f)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:

OUTPUT_DIR        = f"{BASE_PATH}/output/entropy_loglaplace_true/pngs_{range_tag}"
OUTPUT_PDF        = f"{BASE_PATH}/output/entropy_loglaplace_true/entropy_loglaplace_{range_tag}.pdf"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [19]:

# ✅ 配对列表
paired_qid_sids = []
for qid, sid_dict in error_index_data.items():
    all_sids = {"sampling0", "sampling1", "sampling2"}
    error_sids = set(sid_dict.keys())
    correct_sids = all_sids - error_sids
    if error_sids and correct_sids:
        for err_sid in error_sids:
            for cor_sid in correct_sids:
                paired_qid_sids.append((qid, err_sid, cor_sid))

# ✅ 熵提取
def extract_entropy_sequence(token_probs):
    return np.array([token["topk_info"]["entropy"] for token in token_probs])

# ✅ 绘图函数（带错误区间标注）
def plot_entropy_loglaplace(ax, entropy, label, color, first_error=None, last_error=None):
    smoothed = gaussian_filter1d(entropy, sigma=1.5)
    second_deriv = np.gradient(np.gradient(smoothed))
    peaks = argrelextrema(second_deriv, np.greater)[0]
    valleys = argrelextrema(second_deriv, np.less)[0]

    ax.plot(entropy, color=color, alpha=0.3, linestyle='--', label=f"{label} (raw)")
    ax.plot(smoothed, color=color, label=f"{label} (smoothed)")
    ax.plot(peaks, smoothed[peaks], "o", color=color, markersize=5, label=f"{label} peaks")
    ax.plot(valleys, smoothed[valleys], "x", color=color, markersize=5, label=f"{label} valleys")

    # ✅ 标出错误区间
    if first_error is not None and last_error is not None:
        ax.axvline(x=first_error, color="black", linestyle="-.", linewidth=1.5, label="First Error")
        ax.axvline(x=last_error, color="black", linestyle="--", linewidth=1.5, label="Last Error")
        ax.plot(first_error, smoothed[first_error], '*', color="black", markersize=10)
        ax.plot(last_error, smoothed[last_error], '*', color="black", markersize=10)

    return smoothed, second_deriv

# ✅ 主流程：逐对画图 + 存图
with PdfPages(OUTPUT_PDF) as pdf:
    for qid, neg_sid, pos_sid in paired_qid_sids:
        try:
            fig, ax = plt.subplots(figsize=(12, 4))
            ax.set_title(f"QID: {qid}, Positive vs Negative")
            ax.set_xlabel("Token Index")
            ax.set_ylabel("Entropy / LoG")

            # Positive
            pos_probs = logits_data[qid][pos_sid]["token_probs"]
            entropy_pos = extract_entropy_sequence(pos_probs)
            plot_entropy_loglaplace(ax, entropy_pos, "Positive", "blue")

            # Negative + Error range
            neg_probs = logits_data[qid][neg_sid]["token_probs"]
            entropy_neg = extract_entropy_sequence(neg_probs)
            first = error_index_data[qid][neg_sid]["first_error_token_index"]
            last = error_index_data[qid][neg_sid]["last_error_token_index"]
            plot_entropy_loglaplace(ax, entropy_neg, "Negative", "red", first_error=first, last_error=last)

            ax.legend()
            ax.grid(True)

            # ✅ 保存图像
            fig_path = os.path.join(OUTPUT_DIR, f"{qid}_{neg_sid}_vs_{pos_sid}.png")
            fig.savefig(fig_path)
            pdf.savefig(fig)
        except Exception as e:
            print(f"[ERROR] Skip QID {qid}: {e}")
        finally:
            plt.close()

print(f"✅ 图像保存在: {OUTPUT_DIR}")
print(f"✅ 汇总 PDF 保存在: {OUTPUT_PDF}")


✅ 图像保存在: /content/drive/MyDrive/Cluster-proj/output/entropy_loglaplace_true/pngs_700-731
✅ 汇总 PDF 保存在: /content/drive/MyDrive/Cluster-proj/output/entropy_loglaplace_true/entropy_loglaplace_700-731.pdf
