In [None]:
from datasets import load_dataset
import datasets
ds = load_dataset("openai/gsm8k", "main")
train: datasets.Dataset = ds["train"]
prompt_templ = """A conversation between User and Assistant. The User asks a question, and the Assistant solves it. The Assistant first thinks about the reasoning process in the mind and then provides the User with the answer. The reasoning process is enclosed within <think> </think> and answer is enclosed within <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>.
User: {0}
Assistant: <think>"""
print(prompt_templ)

In [None]:
from cs336_alignment.math_baseline import extract_answer
prompts = []
ground_truths = []
for t, data in enumerate(train):
    question = data["question"]
    answer_text = data["answer"]
    answer = extract_answer(answer_text)
    assert answer is not None, f"Could not extract answer from: {answer_text}"
    full_prompt = prompt_templ.format(question)
    prompts.append(full_prompt)
    ground_truths.append(answer)

In [None]:
import random
index = random.randint(0, len(prompts) - 1)
print("Question:")
print(train[index]["question"])
print("-" * 20)
print("Ground truth answer:")
print(ground_truths[index])
print("-" * 20)
print("Answer Text:")
print(train[index]["answer"])

In [60]:
import pickle
from cs336_alignment.math_baseline import EvalEntry
with open("../data/math_baseline_eval_results.pkl", "rb") as f:
    results: list[EvalEntry] = pickle.load(f)

In [143]:
import random
from cs336_alignment.math_baseline import EvalEntry
total_samples = len(results)
index = random.randint(0, total_samples - 1)
eval_entry: EvalEntry = results[index]
if eval_entry.reward <= 0 and eval_entry.format_reward <= 0:
    print("Prompt:")
    print(eval_entry.prompt)
    print("-" * 20)
    print("Response:")
    print(eval_entry.response)
    print("-" * 20)
    print("Ground Truth:")
    print(eval_entry.ground_truth)
    print("-" * 20)
    print("Reward:")
    print(eval_entry.reward)

"""Format Reward looks correct, only one issue is that therre are some cases 
where <think/> something <answer> answer </answer> is not satisfied. 
We can improve the format reward by checking for this condition."""

'Format Reward looks correct, only one issue is that therre are some cases \nwhere <think/> something <answer> answer </answer> is not satisfied. \nWe can improve the format reward by checking for this condition.'

In [61]:
correct_count = 0
format_correct_count = 0
wrong_count = 0
total_count = len(results)
for t, entry in enumerate(results):
    if entry.reward > 0:
        correct_count += 1
    elif entry.format_reward > 0:
        format_correct_count += 1
    else:
        wrong_count += 1
print(f"Total samples: {len(results)}")
print(f"Correct samples: {correct_count}, percent: {correct_count / total_count:.4f}")
print(f"Format correct samples: {format_correct_count}, percent: {format_correct_count / total_count:.4f}")
print(f"Wrong samples: {wrong_count}, percent: {wrong_count / total_count:.4f}")

Total samples: 32
Correct samples: 7, percent: 0.2188
Format correct samples: 0, percent: 0.0000
Wrong samples: 25, percent: 0.7812


In [62]:
import random
from math_verify import parse
all_indexes = []
for t, entry in enumerate(results):
    if entry.answer_reward_v2 > 0 and entry.format_reward > 0 and entry.reward <= 0:
        all_indexes.append(t)

print(f"Total such samples: {len(all_indexes)}")
index = random.choice(all_indexes)
entry = results[index]
format_rewards = entry.format_reward
print("Prompt:")
print(entry.prompt)
print("-" * 20)
print("Response:")
print(parse(entry.response))
print("-" * 20)
print("Ground Truth:")
print(parse(entry.ground_truth))
print("-" * 20)
print("Reward:")
print(f"reward: {entry.reward}, format_reward: {entry.format_reward}, answer_reward_v2: {entry.answer_reward_v2}")

Total such samples: 0


IndexError: Cannot choose from an empty sequence

In [None]:
from cs336_alignment.drgrpo_grader import r1_zero_reward_fn, _normalize
from math_verify import parse, verify, LatexExtractionConfig, ExprExtractionConfig

resp_answer = parse(entry.response)
gt_answer = parse(entry.ground_truth)
is_correct = verify(resp_answer, gt_answer)
print(f"resp_answer: {resp_answer}, ground truth: {gt_answer}, is_correct: {1.0 if is_correct else 0.0}")

reward = r1_zero_reward_fn(entry.response, entry.ground_truth, False)
print(f"Reward: {reward}")

resp_answer: [0, '0'], ground truth: [Eq(-6, 0) & Eq(2*a, -6), '2a = -6 = 0'], is_correct: 1.0
Reward: {'format_reward': 1.0, 'answer_reward': 0.0, 'reward': 0.0}
is_latex_equal: True


In [2]:
from datasets import load_dataset
import datasets
ds = load_dataset("hkust-nlp/dart-math-uniform")
train: datasets.Dataset = ds["train"]
prompt_templ = """A conversation between User and Assistant. The User asks a question, and the Assistant solves it. The Assistant first thinks about the reasoning process in the mind and then provides the User with the answer. The reasoning process is enclosed within <think> </think> and answer is enclosed within <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>.
User: {0}
Assistant: <think>"""

In [None]:
from typing import List
from cs336_alignment.extract import extract_ans
prompts: List[str] = []
responses: List[str] = []
for t, data in enumerate(train):
    question: str = data["query"] # type: ignore
    answer_text: str = data["response"] # type: ignore
    answer = extract_ans(answer_text, True)
    if answer is None:
        print(f"Skipping sample {t} due to no extractable answer.")
        break
    full_prompt = prompt_templ.format(question)
    prompts.append(full_prompt)
    responses.append(f"{answer_text} </think> <answer> {answer} </answer>")

In [35]:
from math_verify import parse, verify
import random
index = random.randint(0, len(train) - 1)
data = train[index]

response = data["response"]

resp_answer = parse(response)
print(f"resp_answer: {resp_answer}")

resp_answer: [11/850, '\\frac{11}{850}']


In [37]:
str(resp_answer[0])

'11/850'