# Test on Router Framework and Development

## Set Up Project

In [1]:
import os, re, time, torch
from dotenv import load_dotenv
from agents import Agent, function_tool, ModelSettings
from agents.extensions.models.litellm_model import LitellmModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from utils import extracted_box, len_extract_boxed
from grader import math_equal
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MODEL = "gemini/gemini-2.5-flash"
GPUS="0 5"
SAMPLES=10
SEED=0
DATASET="math_500"
MAX_TOKENS=8000

In [3]:
# SLM Config
_SLM_ID = "Qwen/Qwen2.5-Math-1.5B-Instruct"

### Load_Data

In [3]:
import sys
current_dir = os.getcwd()
from pathlib import Path
sys.path.append(str(Path(current_dir).parent))

In [4]:
from tools.data_loader import read_data
sys.path

['/data/long/miniconda3/envs/delegate/lib/python310.zip',
 '/data/long/miniconda3/envs/delegate/lib/python3.10',
 '/data/long/miniconda3/envs/delegate/lib/python3.10/lib-dynload',
 '',
 '/data/long/miniconda3/envs/delegate/lib/python3.10/site-packages',
 '/data/long/hai/Individual_Project/Delegate_SLM_Focus/experiments',
 '/tmp/tmpvxavajmg',
 '/data/long/hai/Individual_Project/Delegate_SLM_Focus']

In [5]:
test_df = read_data(DATASET, n_samples=SAMPLES, random_seed=SEED)


Loaded 10 problems from GSM8K (test split)


In [6]:
test_df.iloc[0]['problem']

'Remmy wants to divide $10$ by $\\frac{2}{3}$, but he cannot remember how to do that.  By what number should he multiply $10$ to get the answer?'

## Counting Tokens

In [7]:
from google import genai

client = genai.Client(api_key=GEMINI_API_KEY)

In [8]:
MODEL_ID = "gemini-2.5-flash" # @param ["gemini-2.5-flash-lite", "gemini-2.5-flash-lite-preview-09-2025", "gemini-2.5-flash", "gemini-2.5-flash-preview-09-2025", "gemini-2.5-pro"] {"allow-input":true, isTemplate: true}

model_info = client.models.get(model=MODEL_ID)

print("Context window:",model_info.input_token_limit, "tokens")
print("Max output window:",model_info.output_token_limit, "tokens")

Context window: 1048576 tokens
Max output window: 65536 tokens


In [10]:
contents = "That's a fantastic question! Pi, symbolized by the Greek letter $\\pi$, is one of the most famous and fundamental numbers in mathematics.\n\nLet's break down what pi is:\n\n### 1. The Definition of Pi ($\\pi$)\n\nAt its core, pi is the **ratio of a circle's circumference to its diameter**.\n\n*   **Circumference:** The distance around the outside of a circle.\n*   **Diameter:** The distance straight across the circle, passing through its center.\n\nNo matter how big or small a circle is, if you divide its circumference by its diameter, you will *always* get the same number: pi.\n\n**In formula form:**\n$$ \\pi = \\frac{\\text{Circumference}}{\\text{Diameter}} $$\n\nThis also means that the circumference of a circle can be found using the formula:\n$$ \\text{Circumference} = \\pi \\times \\text{Diameter} \\quad (\\text{or } C = \\pi d) $$\nSince the diameter is twice the radius ($d = 2r$), we can also write it as:\n$$ \\text{Circumference} = 2 \\times \\pi \\times \\text{Radius} \\quad (\\text{or } C = 2\\pi r) $$\n\n### 2. The Value of Pi\n\nPi is an **irrational number**. This means two very important things:\n\n*   **Its decimal representation goes on forever** without ending.\n*   **Its decimal representation never repeats** in a predictable pattern.\n\nFor practical purposes, we often use approximations of pi:\n\n*   **Common approximation:** $\\pi \\approx 3.14$\n*   **More precise approximation:** $\\pi \\approx 3.14159$\n*   **Fractional approximation (less common but useful):** $\\pi \\approx \\frac{22}{7}$ (Note: 22/7 is only an approximation, as its decimal expansion *does* repeat).\n\nMathematicians have calculated pi to trillions of digits, but for most everyday calculations, 3.14 or 3.14159 is sufficient.\n\n### 3. Why is Pi Important?\n\nPi is crucial because it appears in countless formulas involving circles, spheres, cylinders, and other curved shapes. It's not just for circumference; it's also key for:\n\n*   **Area of a circle:** $A = \\pi r^2$\n*   **Volume of a sphere:** $V = \\frac{4}{3}\\pi r^3$\n*   **Surface area of a sphere:** $A = 4\\pi r^2$\n*   **Volume of a cylinder:** $V = \\pi r^2 h$\n\nAnd many, many more advanced mathematical and scientific formulas. It even pops up in unexpected places in statistics, physics, and engineering!\n\n### Examples:\n\n**Example 1: Finding the Circumference of a Circle**\n\nImagine you have a circular pool with a diameter of 10 meters. How much fencing do you need to go around it?\n\n*   **Formula:** $C = \\pi d$\n*   **Given:** $d = 10 \\text{ meters}$\n*   **Calculation (using $\\pi \\approx 3.14$):**\n    $C ="

In [12]:
print(contents)

That's a fantastic question! Pi, symbolized by the Greek letter $\pi$, is one of the most famous and fundamental numbers in mathematics.

Let's break down what pi is:

### 1. The Definition of Pi ($\pi$)

At its core, pi is the **ratio of a circle's circumference to its diameter**.

*   **Circumference:** The distance around the outside of a circle.
*   **Diameter:** The distance straight across the circle, passing through its center.

No matter how big or small a circle is, if you divide its circumference by its diameter, you will *always* get the same number: pi.

**In formula form:**
$$ \pi = \frac{\text{Circumference}}{\text{Diameter}} $$

This also means that the circumference of a circle can be found using the formula:
$$ \text{Circumference} = \pi \times \text{Diameter} \quad (\text{or } C = \pi d) $$
Since the diameter is twice the radius ($d = 2r$), we can also write it as:
$$ \text{Circumference} = 2 \times \pi \times \text{Radius} \quad (\text{or } C = 2\pi r) $$

### 2. The

In [13]:
response = client.models.count_tokens(
    model=MODEL_ID,
    contents=contents
)
print("Prompt tokens:",response.total_tokens)

Prompt tokens: 717


## Test Agent

### Multiple Agents Test

In [16]:
from agents import Agent, InputGuardrail, GuardrailFunctionOutput, Runner
from agents.exceptions import InputGuardrailTripwireTriggered
from pydantic import BaseModel
import asyncio

In [17]:
from openai.types.shared.reasoning import Reasoning

from agents import Agent, ModelSettings, Runner, trace
from agents.items import ReasoningItem

In [18]:
math_tutor_agent = Agent(
    name="Math Tutor",
    handoff_description="Specialist agent for math questions",
    instructions="You provide help with math problems. Explain your reasoning at each step and include examples",
    model=LitellmModel(model=MODEL, api_key=GEMINI_API_KEY),
    model_settings=ModelSettings(
        max_tokens=1024,
        reasoning={"effort": "minimal"} 
    ),
)

history_tutor_agent = Agent(
    name="History Tutor",
    handoff_description="Specialist agent for historical questions",
    instructions="You provide assistance with historical queries. Explain important events and context clearly.",
    model=LitellmModel(model=MODEL, api_key=GEMINI_API_KEY),
    model_settings=ModelSettings(
        max_tokens=1024,
        reasoning={"effort": "minimal"}
    ),
)

triage_agent = Agent(
    name="Triage Agent",
    instructions="You determine which agent to use based on the user's homework question",
    handoffs=[history_tutor_agent, math_tutor_agent],
    # input_guardrails=[
    #     InputGuardrail(guardrail_function=homework_guardrail),
    # ],
    model=LitellmModel(model=MODEL, api_key=GEMINI_API_KEY),
    model_settings=ModelSettings(
        max_tokens=1024,
        reasoning={"effort": "minimal"} 
    ),
)

In [19]:
result = await Runner.run(triage_agent, test_df.iloc[2]['problem'])

In [20]:
response = result.raw_responses[0]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")
print(response.output[0].name)

Input tokens: 139
Output tokens: 17
Total tokens: 156
Reasoning tokens: 2
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
transfer_to_math_tutor


In [21]:
response = result.raw_responses[1]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")
print(response.output[0].content[0].text)

Input tokens: 94
Output tokens: 1022
Total tokens: 1116
Reasoning tokens: 2
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
That's a great problem! Let's break it down step-by-step.

The problem asks us to calculate $\left(4\dfrac{5}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}$.

**Step 1: Convert the mixed number to an improper fraction.**

A mixed number like $4\dfrac{5}{8}$ means $4 + \dfrac{5}{8}$. To convert it to an improper fraction, we multiply the whole number (4) by the denominator (8) and add the numerator (5). Then, we keep the same denominator.

*   $4\dfrac{5}{8} = \dfrac{4 \times 8 + 5}{8}$
*   $4 \times 8 = 32$
*   $32 + 5 = 37$
*   So, $4\dfrac{5}{8} = \dfrac{37}{8}$

**Example:** If you had $2\dfrac{1}{3}$, it would be $\dfrac{2 \times 3 + 1}{3} = \dfrac{6 + 1}{3} = \dfrac{7}{3}$.

Now our expression looks like this: $\left(\dfrac{37}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}$.


## Creating Agent

### Gemini Agent

In [14]:
# Agent definition
INSTRUCTIONS = """You are a math problem solver with access to a specialized calculation tool. You reason and think in steps.

You MUST use the `slm_help` tool for EVERY calculation, including:
- Basic arithmetic (addition, subtraction, multiplication, division)
- Percentages and fractions
- Equations and algebra
- Any operation involving numbers

## Your Workflow:

1. Read the problem carefully
2. Identify what calculations are needed
3. For EACH calculation (no matter how simple):
   - Call slm_help(question) with the specific calculation
   - Wait for the result
   - Integrate that result in your reasoning
4. After all calculations are complete, provide the final answer in \\boxed{} format

## Examples:

Problem: "What is 156 + 243?"
→ CORRECT: Call slm_help("156 + 243")
→ WRONG: Answering "399" directly

Problem: "If I buy 3 shirts at $15 each, what's the total?"
→ CORRECT: Call slm_help("3 × 15")
→ WRONG: Saying "3 times 15 is 45"

Problem: "Natalia sold 48 clips in April and half as many in May. Total?"
→ Step 1: Call slm_help("48 ÷ 2") to find May's amount
→ Step 2: Call slm_help("48 + [May's amount]") to find total

## Remember:
- ALWAYS use the tool for calculations
- NEVER compute anything yourself
- Present final answer as \\boxed{answer}
"""

### SLM_Help Construction

In [19]:
# SLM lazy loader
_SLM, _TOK = None, None
_SLM_ID = "Qwen/Qwen2.5-Math-1.5B-Instruct"

SLM_CALL_COUNT = 0


def get_slm_call_count():
    """Return how many times slm_help() was called."""
    return SLM_CALL_COUNT

def reset_slm_call_count():
    """Reset the call counter (e.g., between questions)."""
    global SLM_CALL_COUNT
    SLM_CALL_COUNT = 0


def _device_dtype():
    if torch.cuda.is_available():
        return "cuda", torch.float16
    elif torch.backends.mps.is_available():
        return "mps", torch.float32
    return "cpu", torch.float32

def _lazy_load_slm():
    global _SLM, _TOK
    if _SLM is None or _TOK is None:
        device, dtype = _device_dtype()
        _SLM = AutoModelForCausalLM.from_pretrained(
            _SLM_ID,
            device_map="auto" if device != "cpu" else None,
            torch_dtype=dtype,
            trust_remote_code=True,
        )
        _TOK = AutoTokenizer.from_pretrained(_SLM_ID)
        _TOK.padding_side = "left"
    return _SLM, _TOK

In [26]:

def slm_help(question: str) -> str:
    """
    Solve a mathematical calculation using specialized math model.
    Returns definitive answer that should be trusted immediately.
    
    Args:
        question: The calculation to perform
    
    Returns:
        Definitive answer in format "CALCULATION COMPLETE: answer"
    """
    global SLM_CALL_COUNT
    SLM_CALL_COUNT += 1
    print(f"[TOOL] slm_help: {question} ... call #{SLM_CALL_COUNT}")


    try:
        model, tok = _lazy_load_slm()

        sys = "You are a math calculator. Solve step-by-step. Put final answer in \\boxed{} at the end."
        messages = [
            {"role": "system", "content": sys},
            {"role": "user", "content": question},
        ]
        
        prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        # prompt = f"Solve this math problem step by step. Put your final answer in \\boxed{{}}.\n\nProblem: {question}"
        inputs = tok([prompt], return_tensors="pt").to(model.device)
        
        # Count input tokens
        input_tokens = inputs["input_ids"].shape[1]

        t0 = time.time()
        with torch.inference_mode():
            out = model.generate(
                **inputs,
                max_new_tokens=512,
                do_sample=False,
                pad_token_id=tok.eos_token_id,
            )
        latency = time.time() - t0
        
        # Count output tokens (only new tokens)
        output_tokens = out.shape[1] - input_tokens
        
        gen = tok.batch_decode(out[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
        print(gen)

        # Extract boxed answer
        # match = re.search(r'\\boxed\{([^}]+)\}', gen)
        answer = extracted_box(gen)
        
        # Log to tracker
        try:
            import sys as _sys
            if 'router_experiment' in _sys.modules:
                from router_experiment import tracker
                tracker.log_tool_call(question, gen, latency, input_tokens, output_tokens)
                print(f"[TRACKER] Logged: {latency:.2f}s, {input_tokens}→{output_tokens} tokens")
        except Exception as e:
            print(f"[TRACKER] Failed: {e}")

        if answer is not None:
            result = f"CALCULATION COMPLETE: The answer is {answer}. Use this directly."
            print(f"[SLM] Answer: {answer} ({latency:.2f}s)")
            return result
        else:
            result = f"CALCULATION COMPLETE: {gen}"
            print(f"[SLM] No boxed answer ({latency:.2f}s)")
            return result
        
    except Exception as e:
        print(f"[SLM] Error: {str(e)}")
        return f"CALCULATION ERROR: {str(e)}. Solve yourself."

In [16]:
agent = Agent(
    name="Math Router",
    instructions=INSTRUCTIONS,
    model=LitellmModel(model=MODEL, api_key=GEMINI_API_KEY),
    model_settings=ModelSettings(
        max_tokens=1024,
        parallel_tool_calls=False,
        reasoning={"effort": "medium"}
    ),
    tools=[slm_help],
)

In [27]:
answer = slm_help("Solve 4 + 26*7*0")
print(answer)

[TOOL] slm_help: Solve 4 + 26*7*0 ... call #4


To solve the expression \(4 + 26 \times 7 \times 0\), we need to follow the order of operations, often remembered by the acronym PEMDAS (Parentheses, Exponents, Multiplication and Division (from left to right), Addition and Subtraction (from left to right)).

Here are the steps:

1. Identify and perform any multiplication or division operations first.
   - In the expression \(4 + 26 \times 7 \times 0\), we see that there are three multiplication operations: \(26 \times 7\), \(7 \times 0\), and the multiplication is performed from left to right.
   - First, calculate \(26 \times 7\):
     \[
     26 \times 7 = 182
     \]
   - Next, multiply the result by 0:
     \[
     182 \times 0 = 0
     \]
   - So, the expression now simplifies to:
     \[
     4 + 0
     \]

2. Perform the addition operation.
   - Add 4 and 0:
     \[
     4 + 0 = 4
     \]

Therefore, the final answer is \(\boxed{4}\).
[SLM] Answer: 4 (7.72s)
CALCULATION COMPLETE: The answer is 4. Use this directly.


In [153]:
result = await Runner.run(agent, test_df.iloc[2]['problem'])

In [154]:
result.input

'What is $\\left(4\\dfrac{5}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}$?'

In [155]:
print(result.final_output)

I apologize for the technical difficulties. It seems there's an issue with the tool. However, I can perform the conversion and calculation step-by-step.

First, convert the mixed number $4\dfrac{5}{8}$ to an improper fraction:
$4\dfrac{5}{8} = \dfrac{4 \times 8 + 5}{8} = \dfrac{32 + 5}{8} = \dfrac{37}{8}$.

Now substitute this back into the original expression:
$\left(\dfrac{37}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}$

Using the property $(a^n) \cdot (b^n) = (a \cdot b)^n$, we can rewrite the expression as:
$\left(\dfrac{37}{8} \cdot \dfrac{8}{37}\right)^{55}$

Now, perform the multiplication inside the parentheses:
$\dfrac{37}{8} \cdot \dfrac{8}{37} = 1$

Finally, substitute this back into the expression:
$(1)^{55}$

Since $1$ raised to any power is $1$:
$(1)^{55} = 1$

The final answer is $\boxed{1}$.


In [157]:
result.raw_responses

[ModelResponse(output=[ResponseOutputMessage(id='__fake_id__', content=[ResponseOutputText(annotations=[], text='thought\nThe user is asking to calculate the product of two numbers raised to the same power.\nThe expression is $\\left(4\\dfrac{5}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}$.\nI can use the property $(a^n) \\cdot (b^n) = (a \\cdot b)^n$.\nFirst, I need to convert the mixed number $4\\dfrac{5}{8}$ to an improper fraction.\n$4\\dfrac{5}{8} = 4 + \\dfrac{5}{8} = \\dfrac{4 \\cdot 8}{8} + \\dfrac{5}{8} = \\dfrac{32}{8} + \\dfrac{5}{8} = \\dfrac{37}{8}$.\nNow, the expression becomes $\\left(\\dfrac{37}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}$.\nUsing the property, this is equal to $\\left(\\dfrac{37}{8} \\cdot \\dfrac{8}{37}\\right)^{55}$.\nNow I need to calculate the product inside the parenthesis.\n$\\dfrac{37}{8} \\cdot \\dfrac{8}{37}$.\nI can see that the numbers will cancel out.\n$\\dfrac{37}{8} \\cdot \\dfrac{8}{37} = 1$.\nSo the expressio

In [86]:
response = result.raw_responses[0]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")
print(response.output[0].content[0].text)

Input tokens: 190
Output tokens: 365
Total tokens: 555
Reasoning tokens: 2
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
To solve this problem, I need to calculate the product of two numbers raised to the same power.
The problem is $\left(4\dfrac{5}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}$.

First, I will convert the mixed number $4\dfrac{5}{8}$ into an improper fraction.
$4\dfrac{5}{8} = \dfrac{4 \cdot 8 + 5}{8} = \dfrac{32 + 5}{8} = \dfrac{37}{8}$.

Now the expression becomes $\left(\dfrac{37}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}$.

Using the property $(a \cdot b)^n = a^n \cdot b^n$, I can rewrite the expression as:
$\left(\dfrac{37}{8} \cdot \dfrac{8}{37}\right)^{55}$.

Next, I will perform the multiplication inside the parentheses:
$\dfrac{37}{8} \cdot \dfrac{8}{37} = 1$.

So the expression simplifies to $(1)^{55}$.

Finally, I will calculate $(1)^{55}$. Any non-zero number raised 

In [88]:
response = result.raw_responses[1]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")
print(response.output[0].content[0].text)

Input tokens: 577
Output tokens: 53
Total tokens: 630
Reasoning tokens: 0
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
The calculation confirms that $\dfrac{37}{8} \cdot \dfrac{8}{37} = 1$.

Therefore, the expression becomes $(1)^{55}$, which is $1$.

The final answer is $\boxed{1}$.


### Local Host Qwen Agent

In [127]:
# Agent definition
INSTRUCTIONS = (
    "You solve math problems step by step.\n\n"
)


In [128]:
@function_tool
def slm_help(problem: str) -> str:
    # Example return
    return "CALCULATION COMPLETE: \\boxed{1}"

In [129]:
agent = Agent(
    name="Math Router",
    instructions=INSTRUCTIONS,
    model=LitellmModel(model="hosted_vllm/Qwen/Qwen3-4B-Instruct-2507", base_url="http://localhost:8000/v1"),
    model_settings=ModelSettings(
        max_tokens=1024,
        parallel_tool_calls=False,
        reasoning={"effort": "low"}
    ),
    # tools=[slm_help],
)

In [130]:
result = await Runner.run(agent, test_df.iloc[2]['problem'])

In [131]:
result

RunResult(input='What is $\\left(4\\dfrac{5}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}$?', new_items=[ReasoningItem(agent=Agent(name='Math Router', handoff_description=None, tools=[], mcp_servers=[], mcp_config={}, instructions='You solve math problems step by step.\n\n', prompt=None, handoffs=[], model=<agents.extensions.models.litellm_model.LitellmModel object at 0x7f04f744dae0>, model_settings=ModelSettings(temperature=None, top_p=None, frequency_penalty=None, presence_penalty=None, tool_choice=None, parallel_tool_calls=False, truncation=None, max_tokens=1024, reasoning=Reasoning(effort='low', generate_summary=None, summary=None), verbosity=None, metadata=None, store=None, include_usage=None, response_include=None, top_logprobs=None, extra_query=None, extra_body=None, extra_headers=None, extra_args=None), input_guardrails=[], output_guardrails=[], output_type=None, hooks=None, tool_use_behavior='run_llm_again', reset_tool_choice=True), raw_item=ResponseReasoningItem(

In [134]:
result.raw_responses[0].output[0].summary[0]

Summary(text='We are given the expression:\n\n$$\n\\left(4\\dfrac{5}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}\n$$\n\n---\n\n### Step 1: Convert the mixed number to an improper fraction\n\n$$\n4\\dfrac{5}{8} = \\dfrac{4 \\times 8 + 5}{8} = \\dfrac{32 + 5}{8} = \\dfrac{37}{8}\n$$\n\nSo the expression becomes:\n\n$$\n\\left(\\dfrac{37}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}\n$$\n\n---\n\n### Step 2: Use the property of exponents: $ a^n \\cdot b^n = (a \\cdot b)^n $\n\n$$\n\\left(\\dfrac{37}{8} \\cdot \\dfrac{8}{37}\\right)^{55}\n$$\n\nNow simplify inside the parentheses:\n\n$$\n\\dfrac{37}{8} \\cdot \\dfrac{8}{37} = \\dfrac{37 \\cdot 8}{8 \\cdot 37} = 1\n$$\n\nSo the expression becomes:\n\n$$\n1^{55} = 1\n$$\n\n---\n\n### ✅ Final Answer:\n\n$$\n\\boxed{1}\n$$', type='summary_text')

In [123]:
result.raw_responses[0].output[0].summary[0]

Summary(text='<tool_call>\n{"name": "slm_help", "arguments": {"problem": "((4 + 5/8) * (8/37))^(55)"}}\n</tool_call>', type='summary_text')

In [106]:
response = result.raw_responses[0]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")

Input tokens: 308
Output tokens: 39
Total tokens: 347
Reasoning tokens: 0
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
