In [None]:
from datasets import load_dataset
ds = load_dataset("open-r1/OpenR1-Math-220k", "default")
ds

README.md: 0.00B [00:00, ?B/s]

Resolving data files:   0%|          | 0/20 [00:00<?, ?it/s]

data/train-00000-of-00010.parquet:   0%|          | 0.00/214M [00:00<?, ?B/s]

data/train-00001-of-00010.parquet:   0%|          | 0.00/215M [00:00<?, ?B/s]

data/train-00002-of-00010.parquet:   0%|          | 0.00/215M [00:00<?, ?B/s]

data/train-00003-of-00010.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

data/train-00004-of-00010.parquet:   0%|          | 0.00/215M [00:00<?, ?B/s]

data/train-00005-of-00010.parquet:   0%|          | 0.00/214M [00:00<?, ?B/s]

data/train-00006-of-00010.parquet:   0%|          | 0.00/216M [00:00<?, ?B/s]

data/train-00007-of-00010.parquet:   0%|          | 0.00/216M [00:00<?, ?B/s]

data/train-00008-of-00010.parquet:   0%|          | 0.00/214M [00:00<?, ?B/s]

data/train-00009-of-00010.parquet:   0%|          | 0.00/215M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/93733 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['problem', 'solution', 'answer', 'problem_type', 'question_type', 'source', 'uuid', 'is_reasoning_complete', 'generations', 'correctness_math_verify', 'correctness_llama', 'finish_reasons', 'correctness_count', 'messages'],
        num_rows: 93733
    })
})

In [20]:
MODEL_DIR = "/home/devcontainers/reasoning-llm-agent/outputs/flan_t5_small_lora_planner"
PROMPT_TEMPLATE = "/home/devcontainers/reasoning-llm-agent/prompts/planner_prompt.txt"
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("DEVICE:", DEVICE)
print("MODEL_DIR:", MODEL_DIR)


DEVICE: cuda
MODEL_DIR: /home/devcontainers/reasoning-llm-agent/outputs/flan_t5_small_lora_planner


In [18]:
# Load tokenizer + model (PEFT-aware). Two common patterns are shown.
# Pattern A: the full model (base + LoRA weights) was saved to MODEL_DIR via trainer.save_model()
# Pattern B: only LoRA adapter was saved; we load base model then apply PeftModel.from_pretrained

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from pathlib import Path

model_dir = Path(MODEL_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)

# Attempt to load the full model first
try:
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_DIR)
    print('Loaded model directly from', MODEL_DIR)
except Exception as e:
    print('Could not load full model directly:', e)
    # fall back to loading base and PEFT adapter
    base_model_name = "google/flan-t5-small"
    print('Loading base model and applying PEFT adapter (base=', base_model_name, ')')
    base = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
    vocab_size = len(tokenizer)
    base.resize_token_embeddings(vocab_size)
    try:
        from peft import PeftModel
        model = PeftModel.from_pretrained(base, MODEL_DIR)
        print('Loaded PEFT adapter from', MODEL_DIR)
    except Exception as e2:
        raise RuntimeError("Failed to load PEFT adapter from MODEL_DIR: " + str(e2))

model = model.to(DEVICE)
model.eval()
print('Model loaded and moved to', DEVICE)

Could not load full model directly: Error(s) in loading state_dict for T5ForConditionalGeneration:
	size mismatch for shared.weight: copying a param with shape torch.Size([32100, 512]) from checkpoint, the shape in current model is torch.Size([32128, 512]).
	size mismatch for lm_head.weight: copying a param with shape torch.Size([32100, 512]) from checkpoint, the shape in current model is torch.Size([32128, 512]).
Loading base model and applying PEFT adapter (base= google/flan-t5-small )
Loaded PEFT adapter from /home/devcontainers/reasoning-llm-agent/outputs/flan_t5_small_lora_planner
Model loaded and moved to cuda


In [21]:
# Wrap the model in a small helper (we reuse PlannerLLM but you can call transformers directly)
# Here we define a small local generate function that uses beam search / greedy decoding
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
from agent.model_planner import create_planner_prompt

def generate_from_prompt(prompt, max_new_tokens=256, num_beams=4, num_return_sequences=1):
    inputs = tokenizer(prompt, return_tensors='pt', truncation=True, max_length=1024).to(DEVICE)
    gen = model.generate(**inputs,
                         max_new_tokens=max_new_tokens,
                         num_beams=num_beams,
                         num_return_sequences=num_return_sequences,
                         do_sample=False,
                         early_stopping=True,
                         repetition_penalty=1.2)
    outs = [tokenizer.decode(g, skip_special_tokens=True) for g in gen]
    return outs

print('Helper generate_from_prompt ready (beam search, no sampling).')


# Build the prompt using create_planner_prompt and preview it (first 800 chars)
question = "A coffee shop sold 45 lattes on Monday and 20% more lattes on Tuesday. How many lattes did they sell in total over the two days?"
prompt = create_planner_prompt(question, PROMPT_TEMPLATE)
print('=== Prompt preview (first 800 chars)\\n')
print(prompt[:800])
# save full prompt for inspection
with open('prompt_debug_lora.txt','w',encoding='utf-8') as f:
    f.write(prompt)
print('\\nFull prompt saved to prompt_debug_lora.txt')


Helper generate_from_prompt ready (beam search, no sampling).
=== Prompt preview (first 800 chars)\n
PLease answer this question:
You are a step-by-step REASONING PLANNER.

Your job: transform a math or logic problem into a short, deterministic, executable PLAN.
OUTPUT FORMAT (must follow exactly):
 - Produce EXACTLY one valid JSON array. No words, no commentary, nothing else.
 - The array must start with '[' and end with ']'.
 - Each element must be an object with ONLY these keys:
     "step"  : integer
     "tool"  : string   (one of: "calc","python","sympy","z3","bruteforce")
     "input" : string   (the exact command the executor should run)
     "comment": string  (one short sentence description)

Hard rules (follow exactly):
1) Never compute numbers in the plan text — use tools. For arithmetic use "calc" or "python", for algebra use "sympy", for constraints use "z3".
2) The "input" 
\nFull prompt saved to prompt_debug_lora.txt


In [22]:
import json
# Run generation and try to extract JSON using the PlannerLLM parsing helper
from agent.model_planner import PlannerLLM
planner = PlannerLLM(model_name=None)  # we won't call load_model(); we'll use our loaded model & tokenizer above

# Use generate_from_prompt to get raw text
cands = generate_from_prompt(prompt, max_new_tokens=256, num_beams=6, num_return_sequences=3)

# Reuse parsing helpers from PlannerLLM class: instantiate and call _extract_json_from_text
parser = PlannerLLM()  # only for parsing utilities; will not load model
results = []
for i, raw in enumerate(cands, 1):
    parsed = parser._extract_json_from_text(raw)
    if parsed is None:
        parsed = parser._repair_and_extract(raw) if hasattr(parser, '_repair_and_extract') else None
    results.append({'raw': raw, 'parsed': parsed})
    print('--- Candidate', i, 'raw preview ---\\n', raw[:800], '\\nparsed:', parsed, '\\n')

# Save candidates for later inspection
with open('lora_generation_candidates.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print('\\nSaved generation candidates to lora_generation_candidates.json')


--- Candidate 1 raw preview ---\n ["step": 1, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':'needs_curation'", "comment": "## Solution.", "step": 2, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':'needs_curation'", "comment": "## Solution.", "step": 3, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':'needs_curation'", "comment": "## Solution.", "step": 4, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':' \nparsed: None \n
--- Candidate 2 raw preview ---\n ["step": 1, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':'needs_curation'", "comment": "## Solution.", "step": 2, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':'needs_curation'", "comment": "## Solution.", "step": 3, "tool": "python", "input": "# STEP (needs curation): ## Solution.nreturn 'note':'needs_curation'", "comment": "## Solution.