In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json
import re

# ✅ 配置区：自动生成路径
start_index = 901
end_index = 950
range_tag = f"{start_index}-{end_index}"

BASE_PATH = "/content/drive/MyDrive/Cluster-proj"
INPUT_PATH = f"{BASE_PATH}/dataset/openai-gsm8k/train.jsonl"
RESULT_PATH = f"{BASE_PATH}/output/llm_steps/whole_logits/deepseek-math-7b-gsm-{range_tag}.json"
PAIR_OUTPUT_PATH = f"{BASE_PATH}/output/inconsistent_pairs/inconsistent-pairs-{range_tag}.json"

# ✅ 提取 final_result，支持 \\boxed 和 <final_result>
def extract_final_result(text):
    match = re.search(r"\$\s*\\\\?boxed\s*{(.*?)}\s*\$", text)
    if match:
        return match.group(1).strip()
    match = re.search(r"<final_result>(.*?)</final_result>", text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

# ✅ 主函数：提取 inconsistent pair
def find_inconsistent_pairs(json_path, save_path):
    with open(json_path, "r") as f:
        results = json.load(f)

    inconsistent_pairs = {}

    for qid, entry in results.items():
        question = entry["question"]
        true_final = entry.get("true_final_result", "").strip()

        positives = []
        negatives = []

        for i in range(3):
            sampling = entry.get(f"sampling{i}", {})
            model_output = sampling.get("whole_answer", "")
            pred_final = sampling.get("final_result", "") or extract_final_result(model_output)

            if not pred_final:
                continue

            if pred_final.strip() == true_final:
                positives.append(model_output)
            else:
                negatives.append(model_output)

        if positives and negatives:
            inconsistent_pairs[qid] = []
            for pos in positives:
                for neg in negatives:
                    inconsistent_pairs[qid].append({
                        "question": question,
                        "true_final_result": true_final,
                        "positive_answer": pos,
                        "negative_answer": neg
                    })

    # ✅ 确保保存文件夹存在
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    with open(save_path, "w") as f:
        json.dump(inconsistent_pairs, f, indent=2, ensure_ascii=False)

    print(f"✅ Saved inconsistent pairs: {save_path}")

# ✅ 执行
find_inconsistent_pairs(RESULT_PATH, PAIR_OUTPUT_PATH)


In [None]:
import json
file_path = "/content/drive/MyDrive/Cluster-proj/output/deepseek7b-math-401-600_with_steps.json"

In [None]:
with open(file_path, 'r') as f:
    data = json.load(f)


In [None]:
import re

def extract_final_answer(ans_str):
    match = re.search(r'####\s*(\S+)', ans_str)
    return match.group(1).strip() if match else None

def get_inconsis_math(data):
    inconsistent_cases = {}

    for qid in data:
        true_ans_value = extract_final_answer(data[qid]['true_ans'])
        if true_ans_value is None:
            print(f"[WARNING] No final answer found for ID {qid}")
            continue

        correct = []
        wrong = []
        for i in range(3):
            pred_ans = data[qid][f'sampling{i}']['answer']
            if true_ans_value in pred_ans:
                data[qid][f'sampling{i}']['accuracy'] = 'true'
                correct.append(f'sampling{i}')
            else:
                data[qid][f'sampling{i}']['accuracy'] = 'false'
                wrong.append(f'sampling{i}')

        if correct and wrong:
            inconsistent_cases[qid] = {
                'correct_sampling': correct,
                'wrong_sampling': wrong
            }

    return inconsistent_cases


In [None]:
inconsistent_cases = get_inconsis_math(data)

In [None]:
out_path = "/content/drive/MyDrive/Cluster-proj/output/1_inconsis_cases/inconsis_deepseek7b-math-401-600.json"
with open(out_path, 'w') as f:
    json.dump(inconsistent_cases, f, indent=2)