In [None]:
# -*- coding: utf-8 -*-
import os, json
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq, ifft
from google.colab import drive

# ✅ 挂载 Google Drive
drive.mount('/content/drive')

# ✅ 配置路径参数
start_index = 901
end_index = 950
range_tag = f"{start_index}-{end_index}"
BASE_PATH = "/content/drive/MyDrive/Cluster-proj"

# LOGITS_PATH = f"{BASE_PATH}/output/llm_steps/whole_logits/deepseek7b-gsm-{range_tag}.json"
# ERROR_INDEX_PATH = f"{BASE_PATH}/output/error_index/{range_tag}_sentence_with_index.json"

LOGITS_PATH       = f"{BASE_PATH}/output/llm_steps/whole_logits/deepseek-math-7b-gsm-{range_tag}.json"
ERROR_INDEX_PATH  = f"{BASE_PATH}/output/error_index/deepseek-math-7b-{range_tag}_index.json"

# ✅ 加载数据
with open(LOGITS_PATH, "r") as f:
    logits_data = json.load(f)
with open(ERROR_INDEX_PATH, "r") as f:
    error_index_data = json.load(f)

# ✅ 输出路径
OUTPUT_DIR = f"{BASE_PATH}/output/paired_entropy_fourier_true"
os.makedirs(OUTPUT_DIR, exist_ok=True)



Mounted at /content/drive


In [None]:

# ✅ 傅里叶平滑函数
def fourier_smooth(y, keep_ratio=0.1):
    y = np.asarray(y)
    N = len(y)
    Y = fft(y)
    Y[int(N * keep_ratio):-int(N * keep_ratio)] = 0
    y_smooth = np.real(ifft(Y))
    return y_smooth

# ✅ 构建成对正负样本列表
paired_qid_sids = []
for qid, sid_dict in error_index_data.items():
    all_sids = {"sampling0", "sampling1", "sampling2"}
    error_sids = set(sid_dict.keys())
    correct_sids = all_sids - error_sids

    if error_sids and correct_sids:
        for err_sid in error_sids:
            for cor_sid in correct_sids:
                paired_qid_sids.append((qid, err_sid, cor_sid))

print(f"✅ 找到 {len(paired_qid_sids)} 对成对样本")

# ✅ 绘图：成对熵曲线对比（傅里叶平滑）
def plot_entropy_fourier_pair(qid, neg_sid, pos_sid, save_dir):
    try:
        neg_probs = logits_data[qid][neg_sid]["token_probs"]
        pos_probs = logits_data[qid][pos_sid]["token_probs"]
    except KeyError:
        print(f"⚠️ 缺失 logits：{qid} | {neg_sid} / {pos_sid}")
        return

    entropy_neg = [tok["topk_info"]["entropy"] for tok in neg_probs]
    entropy_pos = [tok["topk_info"]["entropy"] for tok in pos_probs]
    idx_neg = list(range(len(entropy_neg)))
    idx_pos = list(range(len(entropy_pos)))

    # 平滑
    smooth_neg = fourier_smooth(entropy_neg, keep_ratio=0.1)
    smooth_pos = fourier_smooth(entropy_pos, keep_ratio=0.1)

    # 错误 token 起止位置（用于标注）
    first = error_index_data[qid][neg_sid]["first_error_token_index"]
    last = error_index_data[qid][neg_sid]["last_error_token_index"]

    # ✅ 画图
    plt.figure(figsize=(12, 5))
    plt.plot(idx_neg, entropy_neg, 'o-', alpha=0.3, label=f"{neg_sid} 原始", color='red')
    plt.plot(idx_neg, smooth_neg, '-', label=f"{neg_sid} 平滑", color='red', linewidth=2)
    plt.plot(idx_pos, entropy_pos, 'o-', alpha=0.3, label=f"{pos_sid} 原始", color='green')
    plt.plot(idx_pos, smooth_pos, '-', label=f"{pos_sid} 平滑", color='green', linewidth=2)

    plt.axvline(first, color='red', linestyle='--', label="First Error Token")
    plt.axvline(last, color='red', linestyle='--', alpha=0.3, label="Last Error Token")

    plt.title(f"傅里叶平滑 Entropy 对比 — {qid} | {neg_sid} vs {pos_sid}")
    plt.xlabel("Token Index")
    plt.ylabel("Entropy")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    fname = f"{qid}_{neg_sid}_vs_{pos_sid}_fourier.png"
    plt.savefig(os.path.join(save_dir, fname))
    plt.close()

# ✅ 批量执行
for qid, neg_sid, pos_sid in paired_qid_sids:
    plot_entropy_fourier_pair(qid, neg_sid, pos_sid, save_dir=OUTPUT_DIR)

print(f"🎯 所有傅里叶平滑对比图已保存至：{OUTPUT_DIR}")

✅ 找到 12 对成对样本


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))


🎯 所有傅里叶平滑对比图已保存至：/content/drive/MyDrive/Cluster-proj/output/paired_entropy_fourier_true


In [None]:
from scipy.fft import fft, fftfreq
import numpy as np
import matplotlib.pyplot as plt
import os

def plot_entropy_fourier_pair_with_spectrum(qid, neg_sid, pos_sid, save_dir, keep_ratio=0.1):
    try:
        neg_probs = logits_data[qid][neg_sid]["token_probs"]
        pos_probs = logits_data[qid][pos_sid]["token_probs"]
    except KeyError:
        print(f"⚠️ 缺失 logits：{qid} | {neg_sid} / {pos_sid}")
        return

    entropy_neg = np.array([tok["topk_info"]["entropy"] for tok in neg_probs])
    entropy_pos = np.array([tok["topk_info"]["entropy"] for tok in pos_probs])
    N_neg = len(entropy_neg)
    N_pos = len(entropy_pos)

    if N_neg < 4 or N_pos < 4:
        print(f"跳过 {qid}，太短")
        return

    idx_neg = np.arange(N_neg)
    idx_pos = np.arange(N_pos)

    # 平滑
    smooth_neg = fourier_smooth(entropy_neg, keep_ratio)
    smooth_pos = fourier_smooth(entropy_pos, keep_ratio)

    # 频谱（仅正频率部分）
    freqs_neg = fftfreq(N_neg, d=1)[:N_neg // 2]
    amp_neg = np.abs(fft(entropy_neg))[:N_neg // 2]

    freqs_pos = fftfreq(N_pos, d=1)[:N_pos // 2]
    amp_pos = np.abs(fft(entropy_pos))[:N_pos // 2]

    # 错误 token 范围
    first = error_index_data[qid][neg_sid]["first_error_token_index"]
    last = error_index_data[qid][neg_sid]["last_error_token_index"]

    # ✅ 可视化：左为熵曲线，右为频谱图
    fig, axs = plt.subplots(1, 2, figsize=(14, 5))

    # 📈 时域曲线
    axs[0].plot(idx_neg, entropy_neg, 'o-', alpha=0.3, label=f"{neg_sid} 原始", color='red')
    axs[0].plot(idx_neg, smooth_neg, '-', label=f"{neg_sid} 平滑", color='red', linewidth=2)
    axs[0].plot(idx_pos, entropy_pos, 'o-', alpha=0.3, label=f"{pos_sid} 原始", color='green')
    axs[0].plot(idx_pos, smooth_pos, '-', label=f"{pos_sid} 平滑", color='green', linewidth=2)
    axs[0].axvline(first, color='red', linestyle='--', label="First Error Token")
    axs[0].axvline(last, color='red', linestyle='--', alpha=0.3, label="Last Error Token")
    axs[0].set_title(f"{qid} | 熵曲线")
    axs[0].set_xlabel("Token Index")
    axs[0].set_ylabel("Entropy")
    axs[0].legend()
    axs[0].grid(True)

    # 🔊 频谱图
    axs[1].plot(freqs_neg, amp_neg, label=f"{neg_sid} 频谱", color='red')
    axs[1].plot(freqs_pos, amp_pos, label=f"{pos_sid} 频谱", color='green')
    axs[1].set_title("傅里叶幅度谱")
    axs[1].set_xlabel("Frequency")
    axs[1].set_ylabel("Amplitude")
    axs[1].legend()
    axs[1].grid(True)

    plt.suptitle(f"Entropy + Spectrum: {qid} | {neg_sid} vs {pos_sid}")
    plt.tight_layout()

    fname = f"{qid}_{neg_sid}_vs_{pos_sid}_fourier_spectrum.png"
    plt.savefig(os.path.join(save_dir, fname))
    plt.close()


In [None]:
SPECTRUM_OUT_DIR = os.path.join(BASE_PATH, "output/paired_entropy_fourier_spectrum")
os.makedirs(SPECTRUM_OUT_DIR, exist_ok=True)

for qid, neg_sid, pos_sid in paired_qid_sids:
    try:
        plot_entropy_fourier_pair_with_spectrum(qid, neg_sid, pos_sid, save_dir=SPECTRUM_OUT_DIR)
    except Exception as e:
        print(f"❌ 跳过 {qid} | {neg_sid} vs {pos_sid} — {e}")

print(f"🎯 所有傅里叶频谱图已保存至：{SPECTRUM_OUT_DIR}")


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))
  plt.savefig(os.path.join(save_dir, fname))


🎯 所有傅里叶频谱图已保存至：/content/drive/MyDrive/Cluster-proj/output/paired_entropy_fourier_spectrum
