In [1]:
from sal.config import Config
from typing import List
import numpy as np
from vllm import SamplingParams, LLM
from sal.utils.resampling import Particle

PROMPT_HEADER = """You are an expert mathematician assisting in solving a math problem step-by-step. You are given the original problem and the sequence of steps taken so far, which represents a *partial* solution.

**Your Primary Task:**
1.  **Review:** Carefully examine the 'Original Problem' and the 'Current Partial Solution Steps' provided below.
2.  **Identify Issues:** Look for errors, inefficient reasoning, or lack of clarity *within the steps already taken*.
3.  **Refine:** Provide an improved, corrected version of *only* the steps presented in 'Current Partial Solution Steps'. Make these existing steps mathematically sound and clear. Do *not* add new steps just to complete the problem unless the refinement naturally leads to the final answer based *only* on the provided partial steps.

**Required Output Formatting:**
After refining the partial steps, format your *entire output* (the refined partial solution) according to these rules:
* **Clarity:** Present the refined steps clearly.
* **Step-by-Step Format (for multiple steps):** If the *refined partial solution* involves multiple distinct conceptual steps (even if it's not the whole problem), use this format:
    ```
    ## Step 1: [Concise description of step]
    [Brief explanation and calculations for this step]

    ## Step 2: [Concise description of step]
    [Brief explanation and calculations for this step]

    ...
    ```
* **Concise Format (for few/simple steps):** If the *refined partial solution* is very short or consists of fewer than 3 conceptual steps (e.g., corrects a single calculation or clarifies one brief point), provide a concise explanation without the `## Step X:` structure.
* **Final Answer Format (Conditional):** This format should ONLY be used if your refinement completes the entire problem. If the refined steps still represent a partial solution, DO NOT include this section. If the problem is fully solved by your refinement, conclude with:
    ```
    Therefore, the final answer is: $\boxed{YourFinalAnswer}$. I hope it is correct.
    ```
    Replace `YourFinalAnswer` with the actual final result using LaTeX format for the box. **To reiterate: Only include this specific concluding line ("Therefore, the final answer is...") if the problem is fully solved by your refined steps. Otherwise, omit it entirely from your response.**

**Focus:** Improve the mathematical accuracy and clarity of the *existing partial work* and strictly follow the specified formatting rules for the *output you generate*.
**HINT:** Among these existing steps, Step 9 is very likely to be the first incorrect step.
---
**Original Problem:**
""" # Ends right before the question is inserted

PROMPT_MIDDLE = """
---
**Current Partial Solution Steps:**
""" # Ends right before the partial solution is inserted

PROMPT_FOOTER = """
---

**Your Task:** Provide the refined and correctly formatted version of the partial solution steps below. Start with the reasoning why Step 9 is wrong, and then followed with the refined steps.
"""

  from .autonotebook import tqdm as notebook_tqdm
2025-04-22 14:00:45,621	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [30]:
CRITIQUE_SINGLE_STEP_PROMPT = """You are an expert mathematician reviewing a specific step in a partially solved math problem.

**Your Task:**

1.  **Context:** Review the 'Original Problem', the 'Previous Steps' (assume these are correct), and the specific 'Step {step_number_to_analyze} Under Review'.
2.  **Analyze:** Determine *why* 'Step {step_number_to_analyze} Under Review' is incorrect based on the preceding steps and the original problem. If it happens to be correct, state that.
3.  **Output:** Your entire output should be ONLY:
    * A concise explanation of the error found in Step {step_number_to_analyze}.
    * The correctly modified text for Step {step_number_to_analyze}. Do NOT include any other steps (previous or subsequent).

**Example Output Structure:**
Reasoning: [Explain the error in Step {step_number_to_analyze} here.]

Corrected Step {step_number_to_analyze}: [Provide the full text of the corrected Step {step_number_to_analyze} here.]

---
**Original Problem:**
{question}
---
**Previous Steps (Steps 1 to {step_number_to_analyze_minus_1}):**
{steps_before_suspicious_text}
---
**Step {step_number_to_analyze} Under Review:**
{suspicious_step_text}
---

**Your Task:** Provide the reasoning and the single corrected step text below.
"""

In [29]:
def self_refinement_simple(
    llm,
    particles: List[Particle],
    question: str,
) -> List[Particle]:
    """
    Refines particles by asking the LLM to improve its own solution. (Simplified)

    Args:
        llm: The language model instance (needs a 'generate' method).
        particles: A list of Particle objects with a 'trajectory' attribute.
        question: The original math problem statement.
        temperature: Temperature for LLM generation (default: 0.7).

    Returns:
        A list of new Particle objects containing the refined solutions.
    """
    refined_particles: List[Particle] = []

    # Define sampling parameters for the LLM call
    # Adjust max_tokens, top_p, stop as needed
    sampling_params = SamplingParams(
        temperature=0.8,
        max_tokens=2048, 
        stop=["<|eot_id|>"]
    )
    tokenizer = llm.get_tokenizer()

    for particle in particles:
        # Basic check: Skip if the particle has no prior solution steps
        if not particle.trajectory:
            refined_particles.append(particle.deepcopy()) # Keep the empty particle as is
            continue

        steps_before_suspicious = particle.trajectory[:8]
        step_number_to_analyze = 9
        step_number_to_analyze_minus_1 = 8
        suspicious_step_text  = particle.trajectory[8]
        steps_before_text = "\n\n".join(steps_before_suspicious) if steps_before_suspicious else "(No previous steps)"
        prompt_content = CRITIQUE_SINGLE_STEP_PROMPT.format(
            question=question,
            step_number_to_analyze_minus_1 = step_number_to_analyze_minus_1,
            step_number_to_analyze=step_number_to_analyze,
            steps_before_suspicious_text=steps_before_text,
            suspicious_step_text=suspicious_step_text
        )
        # ------------------------------------------

        messages = [{"role": "system", "content": prompt_content}]
        prompt = tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True,
            )

        try:
            print(prompt)
            outputs = llm.generate(prompt, sampling_params)

            # --- Extract the refined text ---
            # !! Adjust based on your llm.generate return structure !!
            refined_formatted_text = outputs[0].outputs[0].text.strip()

            if not refined_formatted_text:
                print("Warning: Received empty formatted refinement. Keeping original.")
                refined_particles.append(particle.deepcopy())
                continue

            refined_particle = particle.deepcopy()
            # Store the refined & formatted partial solution as the new trajectory
            # It replaces the previous (unrefined) partial trajectory
            refined_particle.trajectory = refined_formatted_text.split("\n\n")

            refined_particles.append(refined_particle)

        except Exception as e:
            # Basic error handling
            print(f"Error during formatted partial refinement: {e}. Keeping original.")
            refined_particles.append(particle.deepcopy())

    return refined_particles

In [4]:
llm = LLM(model="meta-llama/Llama-3.2-3B-Instruct")


INFO 04-22 14:00:51 config.py:510] This model supports multiple tasks: {'classify', 'embed', 'score', 'generate', 'reward'}. Defaulting to 'generate'.
INFO 04-22 14:00:51 config.py:1458] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 04-22 14:00:51 llm_engine.py:234] Initializing an LLM engine (v0.6.6) with config: model='meta-llama/Llama-3.2-3B-Instruct', speculative_config=None, tokenizer='meta-llama/Llama-3.2-3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endp

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  5.09it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  2.01it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:00<00:00,  2.21it/s]



INFO 04-22 14:00:56 model_runner.py:1099] Loading model weights took 6.0160 GB
INFO 04-22 14:00:56 worker.py:241] Memory profiling takes 0.55 seconds
INFO 04-22 14:00:56 worker.py:241] the current vLLM instance can use total_gpu_memory (47.43GiB) x gpu_memory_utilization (0.90) = 42.69GiB
INFO 04-22 14:00:56 worker.py:241] model weights take 6.02GiB; non_torch_memory takes 0.10GiB; PyTorch activation peak memory takes 1.18GiB; the rest of the memory reserved for KV Cache is 35.39GiB.
INFO 04-22 14:00:56 gpu_executor.py:76] # GPU blocks: 20705, # CPU blocks: 2340
INFO 04-22 14:00:56 gpu_executor.py:80] Maximum concurrency for 131072 tokens per request: 2.53x
INFO 04-22 14:01:01 model_runner.py:1415] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utiliz

Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:13<00:00,  2.69it/s]

INFO 04-22 14:01:14 model_runner.py:1535] Graph capturing finished in 13 secs, took 0.19 GiB
INFO 04-22 14:01:14 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 18.48 seconds





In [35]:
question = "What is the least positive integer multiple of 30 that can be written with only the digits 0 and 2?"

steps = """Understand the problem\nWe are looking for the smallest positive integer that is a multiple of 30 and can be formed using only the digits 0 and 2.\n\n## Step 2: Factorize 30\nThe prime factorization of 30 is 2 * 3 * 5.\n\n## Step 3: Determine the requirements\nFor a number to be a multiple of 30, it must be divisible by 2, 3, and 5.\n\n## Step 4: Consider the digits 0 and 2\nSince we can only use the digits 0 and 2, we can use them in any position in the number.\n\n## Step 5: Analyze the options\nTo meet the requirements, we need to find the smallest multiple of 30 that can be formed using only 0 and 2.\n\n## Step 6: Start with the multiples of 30\nWe can start by listing the multiples of 30 and checking which ones can be formed using only the digits 0 and 2.\n\n## Step 7: List multiples of 30\nMultiples of 30 are: 30, 60, 90, 120, 150, 180, 210, 240, 270, 300.\n\n## Step 8: Check if they meet the requirements\nAmong the multiples of 30, we need to check which ones can be formed using only the digits 0 and 2.\n\n## Step 9: Analyze the options\n120 meets the requirement as it can be formed using only the digits 0 and 2.\n\n## Step 10: Verify if it is the smallest possible\n120 is the smallest multiple of 30 that can be formed using only the digits 0 and 2.\n\nThe final answer is: $\\boxed{120}$"""
steps = steps.split("\n\n")
particle = Particle()
particle.trajectory = steps
paritcles = [particle]
output = self_refinement_simple(llm, paritcles, question)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 22 Apr 2025

You are an expert mathematician reviewing a specific step in a partially solved math problem.

**Your Task:**

1.  **Context:** Review the 'Original Problem', the 'Previous Steps' (assume these are correct), and the specific 'Step 9 Under Review'.
2.  **Analyze:** Determine *why* 'Step 9 Under Review' is incorrect based on the preceding steps and the original problem. If it happens to be correct, state that.
3.  **Output:** Your entire output should be ONLY:
    * A concise explanation of the error found in Step 9.
    * The correctly modified text for Step 9. Do NOT include any other steps (previous or subsequent).

**Example Output Structure:**
Reasoning: [Explain the error in Step 9 here.]

Corrected Step 9: [Provide the full text of the corrected Step 9 here.]

---
**Original Problem:**
What is the least positive integer multiple of 30 that can be written with

Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.34s/it, est. speed input: 453.03 toks/s, output: 85.69 toks/s]


In [36]:
output[0].trajectory

['Reasoning: The error in Step 9 is that it states "120 meets the requirement as it can be formed using only the digits 0 and 2" without considering that the correct sequence of digits for 120 is 1, 2, 0 which includes the digit 1 that cannot be used according to the problem\'s requirement.',
 'Corrected Step 9:\n## Step 9: Analyze the options\n120 cannot meet the requirements as it cannot be formed using only the digits 0 and 2, because it includes the digit 1.']