# Set Up Project

In [4]:
import os, re, time, torch
from dotenv import load_dotenv
from agents import Agent, function_tool, ModelSettings
from agents.extensions.models.litellm_model import LitellmModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from utils import extracted_box, len_extract_boxed
from grader import math_equal
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [5]:
from agents import Agent, InputGuardrail, GuardrailFunctionOutput, Runner
from agents.exceptions import InputGuardrailTripwireTriggered
from pydantic import BaseModel
import asyncio

In [6]:
from openai.types.shared.reasoning import Reasoning

from agents import Agent, ModelSettings, Runner, trace
from agents.items import ReasoningItem

In [7]:
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# MODEL = "gemini/gemini-2.5-flash"
MODEL = "gemini/gemini-2.5-flash"
GPUS="0 5"
SAMPLES=10
SEED=0
DATASET="math_500"
MAX_TOKENS=8000

# Load Data

In [8]:
import sys
current_dir = os.getcwd()
from pathlib import Path
sys.path.append(str(Path(current_dir).parent))

In [9]:
from tools.data_loader import read_data

In [10]:
test_df = read_data(DATASET, n_samples=SAMPLES, random_seed=SEED)

Loaded 10 problems from GSM8K (test split)


# Multi Agent Set Up

In [None]:
SLM_INSTRUCTION = """
"You are an expert at solving math problems. Solve this problem step by step. \
Make sure that your final answer should be simplified (no units). \
Provide your final answer in \\boxed{{answer}} format."
"""

In [59]:
slm_agent = Agent(
    name="SLM Math Expert",
    instructions=SLM_INSTRUCTION,
    model=LitellmModel(model="hosted_vllm/Qwen/Qwen2.5-Math-1.5B-Instruct", base_url="http://localhost:8000/v1"),
    model_settings=ModelSettings(
        max_tokens=1024,
        reasoning={"effort": "medium"},
        include_usage=True,
    ),
    # tools=[slm_help],
)

In [13]:
LLM_INSTRUCTION = """
"You are an expert at solving math problems. Solve this problem step by step. \
Make sure that your final answer should be simplified (no units). \
Provide your final answer in \\boxed{{answer}} format."
"""

In [14]:
llm_agent = Agent(
    name="LLM Math Experts",
    instructions=LLM_INSTRUCTION,
    model=LitellmModel(model=MODEL, api_key=GEMINI_API_KEY),
    model_settings=ModelSettings(
        max_tokens=4096,
        parallel_tool_calls=False,
        reasoning={"effort": "medium"},
        include_usage=True,
    )
)

In [15]:
print(test_df.iloc[3]['problem'])

The circle $x^2 + y^2 = 2$ and the parabola $y^2 = 8x$ have two common tangents, forming four points of tangency.  Find the area of the quadrilateral formed by the four points of tangency.

[asy]
unitsize(0.8 cm);

real upperparab (real x) {
  return (sqrt(8*x));
}

real lowerparab (real x) {
  return (-sqrt(8*x));
}

pair A, B, C, D;

A = (-1,1);
B = (2,4);
C = (-1,-1);
D = (2,-4);

draw(graph(upperparab,0,3));
draw(graph(lowerparab,0,3));
draw(Circle((0,0),sqrt(2)));
draw(interp(A,B,-0.2)--interp(A,B,1.2));
draw(interp(C,D,-0.2)--interp(C,D,1.2));
draw(A--C);
draw(B--D);

dot(A);
dot(B);
dot(C);
dot(D);
[/asy]


## LLM_Result

In [175]:
llm_result = await Runner.run(llm_agent, test_df.iloc[3]['problem'])

In [176]:
# Reasoning Part
print(llm_result.new_items[0].raw_item.summary[0].text)

**Solving for Common Tangents and Their Points of Tangency**

Okay, so I'm looking at finding the common tangents to a circle and a parabola. The circle's equation is $x^2 + y^2 = 2$, and the parabola is $y^2 = 8x$.  My approach is to assume a common tangent line $y = mx + c$.

For the circle, the distance from the origin (its center) to the tangent line must equal the radius, which is $\sqrt{2}$. This leads me to the equation $c^2 = 2(m^2 + 1)$.

Now, for the parabola, the general condition for tangency of a line $y = mx + c$ to $y^2 = 4ax$ is $c = \frac{a}{m}$. In our case, with $y^2 = 8x$, we have $4a = 8$, so $a = 2$.  Therefore, $c = \frac{2}{m}$.

I can now substitute the expression for *c* from the parabola's tangency condition into the equation derived from the circle. This gives me a quadratic in $m^2$: $(\frac{2}{m})^2 = 2(m^2 + 1)$. Simplifying this leads to $m^4 + m^2 - 2 = 0$. Factoring this, I get $(m^2 - 1)(m^2 + 2) = 0$. Since $m^2$ must be non-negative for real tangent

In [177]:
# Final Output Part
print(llm_result.final_output)

The equation of the circle is $x^2 + y^2 = 2$.
The equation of the parabola is $y^2 = 8x$.

Let the equation of a common tangent be $y = mx + c$.

For the circle $x^2 + y^2 = r^2$, the condition for $y = mx + c$ to be tangent is $r^2(m^2 + 1) = c^2$.
Here $r^2 = 2$, so $2(m^2 + 1) = c^2$. (Equation 1)

For the parabola $y^2 = 4ax$, the condition for $y = mx + c$ to be tangent is $c = \frac{a}{m}$.
Here $4a = 8$, so $a = 2$. Thus, $c = \frac{2}{m}$. (Equation 2)

Substitute Equation 2 into Equation 1:
$2(m^2 + 1) = \left(\frac{2}{m}\right)^2$
$2(m^2 + 1) = \frac{4}{m^2}$
Divide by 2:
$m^2 + 1 = \frac{2}{m^2}$
Multiply by $m^2$:
$m^4 + m^2 = 2$
$m^4 + m^2 - 2 = 0$
This is a quadratic equation in $m^2$. Let $u = m^2$.
$u^2 + u - 2 = 0$
$(u+2)(u-1) = 0$
So $u = -2$ or $u = 1$.
Since $u = m^2$, $m^2$ must be non-negative for real tangents. Thus, $m^2 = 1$.
This gives $m = 1$ or $m = -1$.

Case 1: $m = 1$.
From $c = \frac{2}{m}$, we get $c = \frac{2}{1} = 2$.
The first common tangent is $L_1

## SLM_Result

In [178]:
slm_result = await Runner.run(slm_agent, test_df.iloc[3]['problem'])

In [179]:
# Reasoning Part
slm_result.new_items[0].raw_item

ResponseOutputMessage(id='__fake_id__', content=[ResponseOutputText(annotations=[], text="To find the area of the quadrilateral formed by the points of tangency of the circle \\(x^2 + y^2 = 2\\) and the parabola \\(y^2 = 8x\\), we need to determine the coordinates of these points of tangency.\n\nFirst, let's find the points of tangency of the parabola \\(y^2 = 8x\\) with the circle \\(x^2 + y^2 = 2\\). We start by substituting \\(y^2 = 8x\\) into the circle's equation \\(x^2 + y^2 = 2\\):\n\n\\[\nx^2 + 8x = 2\n\\]\n\nRearrange this into a standard quadratic equation form:\n\n\\[\nx^2 + 8x - 2 = 0\n\\]\n\nWe solve this quadratic equation using the quadratic formula \\(x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}\\), where \\(a = 1\\), \\(b = 8\\), and \\(c = -2\\):\n\n\\[\nx = \\frac{-8 \\pm \\sqrt{8^2 - 4 \\cdot 1 \\cdot (-2)}}{2 \\cdot 1} = \\frac{-8 \\pm \\sqrt{64 + 8}}{2} = \\frac{-8 \\pm \\sqrt{72}}{2} = \\frac{-8 \\pm 6\\sqrt{2}}{2} = -4 \\pm 3\\sqrt{2}\n\\]\n\nThus, the \\(x\\)-coor

In [180]:
# Final Output Part
print(slm_result.final_output)

To find the area of the quadrilateral formed by the points of tangency of the circle \(x^2 + y^2 = 2\) and the parabola \(y^2 = 8x\), we need to determine the coordinates of these points of tangency.

First, let's find the points of tangency of the parabola \(y^2 = 8x\) with the circle \(x^2 + y^2 = 2\). We start by substituting \(y^2 = 8x\) into the circle's equation \(x^2 + y^2 = 2\):

\[
x^2 + 8x = 2
\]

Rearrange this into a standard quadratic equation form:

\[
x^2 + 8x - 2 = 0
\]

We solve this quadratic equation using the quadratic formula \(x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}\), where \(a = 1\), \(b = 8\), and \(c = -2\):

\[
x = \frac{-8 \pm \sqrt{8^2 - 4 \cdot 1 \cdot (-2)}}{2 \cdot 1} = \frac{-8 \pm \sqrt{64 + 8}}{2} = \frac{-8 \pm \sqrt{72}}{2} = \frac{-8 \pm 6\sqrt{2}}{2} = -4 \pm 3\sqrt{2}
\]

Thus, the \(x\)-coordinates of the points of tangency are \(x_1 = -4 + 3\sqrt{2}\) and \(x_2 = -4 - 3\sqrt{2}\).

Next, we find the corresponding \(y\)-coordinates by substitutin

# Orchestrator Agent

In [60]:
Orchestrator_INSTRUCTION = """
You are a math problem orchestrator. You **cannot solve problems yourself**. 
For easy/medium problems, hand off to slm_agent.
For hard problems, hand off to llm_agent.
Do not attempt any reasoning yourself.
Return exactly the output from the handoff agent.
"""


In [61]:
Orchestrator_MODEL = "gemini/gemini-2.5-flash-lite"

In [62]:
orchestrator_agent = Agent(
    name="orchestrator_agent",
    instructions=Orchestrator_INSTRUCTION,
    model=LitellmModel(model=Orchestrator_MODEL, api_key=GEMINI_API_KEY),
    model_settings=ModelSettings(
        max_tokens=1024,
        reasoning={"effort": "minimal"},
        include_usage=True,
    ),
    # tools=[
    #     slm_agent.as_tool(
    #         tool_name="slm_help",
    #         tool_description="SLM Math Expert",
    #     ),
    #     llm_agent.as_tool(
    #         tool_name="llm_help",
    #         tool_description="LLM Math Expert",
    #     ),
    # ],
    handoffs=[slm_agent, llm_agent],
)

## Examples using LLM_Agent

In [52]:
print(test_df.iloc[3]['problem'])

The circle $x^2 + y^2 = 2$ and the parabola $y^2 = 8x$ have two common tangents, forming four points of tangency.  Find the area of the quadrilateral formed by the four points of tangency.

[asy]
unitsize(0.8 cm);

real upperparab (real x) {
  return (sqrt(8*x));
}

real lowerparab (real x) {
  return (-sqrt(8*x));
}

pair A, B, C, D;

A = (-1,1);
B = (2,4);
C = (-1,-1);
D = (2,-4);

draw(graph(upperparab,0,3));
draw(graph(lowerparab,0,3));
draw(Circle((0,0),sqrt(2)));
draw(interp(A,B,-0.2)--interp(A,B,1.2));
draw(interp(C,D,-0.2)--interp(C,D,1.2));
draw(A--C);
draw(B--D);

dot(A);
dot(B);
dot(C);
dot(D);
[/asy]


In [97]:
llm_orchestrator_result = await Runner.run(orchestrator_agent, test_df.iloc[3]['problem'])

In [104]:
llm_orchestrator_result.new_items

[ReasoningItem(agent=Agent(name='orchestrator_agent', handoff_description=None, tools=[], mcp_servers=[], mcp_config={}, instructions='\nYou are a math problem orchestrator. You **cannot solve problems yourself**. \nFor easy/medium problems, hand off to slm_agent.\nFor hard problems, hand off to llm_agent.\nDo not attempt any reasoning yourself.\nReturn exactly the output from the handoff agent.\n', prompt=None, handoffs=[Agent(name='SLM Math Expert', handoff_description=None, tools=[], mcp_servers=[], mcp_config={}, instructions='\n"You are an expert at solving math problems. Solve this problem step by step. Make sure that your final answer should be simplified (no units). Provide your final answer in \\boxed{{answer}} format."\n', prompt=None, handoffs=[], model=<agents.extensions.models.litellm_model.LitellmModel object at 0x7f37f2ba44c0>, model_settings=ModelSettings(temperature=None, top_p=None, frequency_penalty=None, presence_penalty=None, tool_choice=None, parallel_tool_calls=N

In [112]:
llm_orchestrator_result.new_items[-1].agent.name

'LLM Math Experts'

In [105]:
# Final Output Part
print(llm_orchestrator_result.final_output)

Let the equation of a common tangent be $y = mx + c$.

For the circle $x^2 + y^2 = 2$, the center is $(0,0)$ and the radius is $\sqrt{2}$. The distance from the center to the tangent line $mx - y + c = 0$ must be equal to the radius.
$\frac{|c|}{\sqrt{m^2 + (-1)^2}} = \sqrt{2}$
$c^2 = 2(m^2 + 1)$ (1)

For the parabola $y^2 = 8x$, substitute $y = mx + c$ into the equation:
$(mx + c)^2 = 8x$
$m^2x^2 + 2mcx + c^2 = 8x$
$m^2x^2 + (2mc - 8)x + c^2 = 0$
For the line to be tangent, the discriminant of this quadratic equation must be zero:
$\Delta = (2mc - 8)^2 - 4(m^2)(c^2) = 0$
$4m^2c^2 - 32mc + 64 - 4m^2c^2 = 0$
$-32mc + 64 = 0$
$32mc = 64$
$mc = 2$ (2)

Now we solve the system of equations (1) and (2). From (2), $c = \frac{2}{m}$. Substitute this into (1):
$(\frac{2}{m})^2 = 2(m^2 + 1)$
$\frac{4}{m^2} = 2m^2 + 2$
Multiply by $m^2$:
$4 = 2m^4 + 2m^2$
$2m^4 + 2m^2 - 4 = 0$
$m^4 + m^2 - 2 = 0$
Let $u = m^2$. Then $u^2 + u - 2 = 0$.
$(u + 2)(u - 1) = 0$
Since $m$ is real, $m^2 \ge 0$. So $u = 

In [106]:
response = llm_orchestrator_result.raw_responses[0]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")
print(response.output[0].summary[0].text)

Input tokens: 434
Output tokens: 180
Total tokens: 614
Reasoning tokens: 163
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
**Assessment and Action Plan for a Complex Geometric Problem**

Okay, so I've got this problem involving a circle, a parabola, and the area of a quadrilateral formed by their tangents. My initial impression? This is going to be a tough one, definitely requiring some serious mathematical manipulation and geometric understanding. It feels like a classic "hard" problem that's going to demand more than just a quick calculation.

Based on the instructions, since this isn't a simple or moderately challenging problem, I'm going to pass the baton. I need to get this over to the `llm_agent` and let the experts handle the heavy lifting. Specifically, I'll execute `default_api.transfer_to_llm_math_experts()` and trust them to come up with the correct solution and area. I am going to hold back any attempt

In [107]:
response = llm_orchestrator_result.raw_responses[1]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The reasoning of final result is:")
print(response.output[0].summary[0].text)
print("The answering final result is:")
print(response.output[1].content[0].text)

Input tokens: 372
Output tokens: 3343
Total tokens: 3715
Reasoning tokens: 1503
Cached tokens: 0
--------------------------------------------------------------------------------
The reasoning of final result is:
**Area of a Tangent Quadrilateral**

Okay, so I'm looking at this problem, and it's asking for the area of a quadrilateral formed by the points of tangency between two common tangents to a circle and a parabola. My initial thought is to set up the equation of a common tangent, and then use the geometric properties of tangency to solve for the relevant parameters.

First, I define the tangent as $y = mx + c$.  I recognize that the distance from the center of the circle (the origin) to this tangent must equal the radius of the circle, which is $\sqrt{2}$. This leads me to the equation $|c| = \sqrt{2(m^2 + 1)}$.  Squaring both sides gives $c^2 = 2(m^2 + 1)$.  That's equation (1).

Next, I consider the parabola.  I substitute the equation of the tangent into the parabola's equation

## Examples using SLM_Agent

In [108]:
print(test_df.iloc[2]['problem'])

What is $\left(4\dfrac{5}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}$?


In [109]:
slm_orchestrator_result = await Runner.run(orchestrator_agent, test_df.iloc[2]['problem'])

In [110]:
slm_orchestrator_result.new_items

[ReasoningItem(agent=Agent(name='orchestrator_agent', handoff_description=None, tools=[], mcp_servers=[], mcp_config={}, instructions='\nYou are a math problem orchestrator. You **cannot solve problems yourself**. \nFor easy/medium problems, hand off to slm_agent.\nFor hard problems, hand off to llm_agent.\nDo not attempt any reasoning yourself.\nReturn exactly the output from the handoff agent.\n', prompt=None, handoffs=[Agent(name='SLM Math Expert', handoff_description=None, tools=[], mcp_servers=[], mcp_config={}, instructions='\n"You are an expert at solving math problems. Solve this problem step by step. Make sure that your final answer should be simplified (no units). Provide your final answer in \\boxed{{answer}} format."\n', prompt=None, handoffs=[], model=<agents.extensions.models.litellm_model.LitellmModel object at 0x7f37f2ba44c0>, model_settings=ModelSettings(temperature=None, top_p=None, frequency_penalty=None, presence_penalty=None, tool_choice=None, parallel_tool_calls=N

In [111]:
slm_orchestrator_result.new_items[-1].agent.name

'SLM Math Expert'

In [113]:
# Final Output Part
print(slm_orchestrator_result.final_output)

To solve the expression \(\left(4\dfrac{5}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}\), we will follow these steps:

1. **Convert the mixed number to an improper fraction:**
   \[
   4\dfrac{5}{8} = 4 + \dfrac{5}{8} = \dfrac{32}{8} + \dfrac{5}{8} = \dfrac{37}{8}
   \]

2. **Rewrite the original expression using the improper fraction:**
   \[
   \left(\dfrac{37}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}
   \]

3. **Use the property of exponents that states \((a^m) \cdot (b^m) = (a \cdot b)^m\):**
   \[
   \left(\dfrac{37}{8} \cdot \dfrac{8}{37}\right)^{55}
   \]

4. **Simplify the fraction inside the parentheses:**
   \[
   \dfrac{37}{8} \cdot \dfrac{8}{37} = \dfrac{37 \cdot 8}{8 \cdot 37} = \dfrac{296}{296} = 1
   \]

5. **Raise the simplified fraction to the power of 55:**
   \[
   1^{55} = 1
   \]

Therefore, the final answer is:
\boxed{1}


In [114]:
response = slm_orchestrator_result.raw_responses[0]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The final result is:")
print(response.output[0].summary[0].text)

Input tokens: 190
Output tokens: 400
Total tokens: 590
Reasoning tokens: 383
Cached tokens: 0
--------------------------------------------------------------------------------
The final result is:
**My Analysis of the Expression**

Okay, here we go. The user has given me this expression to calculate: $(4\frac{5}{8})^{55} \cdot (\frac{8}{37})^{55}$. At first glance, the large exponent makes direct calculation impractical, which means it probably requires a clever application of some mathematical principle.

I see that it's the multiplication of two terms raised to the same power. That instantly triggers the thought of using the rule $a^n \cdot b^n = (a \cdot b)^n$. This problem might be quite manageable if I can simplify things by applying that property.

First, I need to convert that mixed number into an improper fraction. $4\frac{5}{8}$ is equivalent to $\frac{37}{8}$. Substituting that back into the original expression gives me $(\frac{37}{8})^{55} \cdot (\frac{8}{37})^{55}$. Now I ca

In [120]:
slm_orchestrator_result.to_input_list()

[{'content': 'What is $\\left(4\\dfrac{5}{8}\\right)^{55} \\cdot \\left(\\dfrac{8}{37}\\right)^{55}$?',
  'role': 'user'},
 {'id': '__fake_id__',
  'summary': [{'text': "**My Analysis of the Expression**\n\nOkay, here we go. The user has given me this expression to calculate: $(4\\frac{5}{8})^{55} \\cdot (\\frac{8}{37})^{55}$. At first glance, the large exponent makes direct calculation impractical, which means it probably requires a clever application of some mathematical principle.\n\nI see that it's the multiplication of two terms raised to the same power. That instantly triggers the thought of using the rule $a^n \\cdot b^n = (a \\cdot b)^n$. This problem might be quite manageable if I can simplify things by applying that property.\n\nFirst, I need to convert that mixed number into an improper fraction. $4\\frac{5}{8}$ is equivalent to $\\frac{37}{8}$. Substituting that back into the original expression gives me $(\\frac{37}{8})^{55} \\cdot (\\frac{8}{37})^{55}$. Now I can apply my

In [116]:
response = slm_orchestrator_result.raw_responses[1]
print("Input tokens:", response.usage.input_tokens)
print("Output tokens:", response.usage.output_tokens)
print("Total tokens:", response.usage.total_tokens)
# Access detailed breakdown
print("Reasoning tokens:", response.usage.output_tokens_details.reasoning_tokens)
print("Cached tokens:", response.usage.input_tokens_details.cached_tokens)


# Output Results
print("-"*80)
print("The answering final result is:")
print(response.output[0].content[0].text)

Input tokens: 162
Output tokens: 366
Total tokens: 528
Reasoning tokens: 0
Cached tokens: 0
--------------------------------------------------------------------------------
The answering final result is:
To solve the expression \(\left(4\dfrac{5}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}\), we will follow these steps:

1. **Convert the mixed number to an improper fraction:**
   \[
   4\dfrac{5}{8} = 4 + \dfrac{5}{8} = \dfrac{32}{8} + \dfrac{5}{8} = \dfrac{37}{8}
   \]

2. **Rewrite the original expression using the improper fraction:**
   \[
   \left(\dfrac{37}{8}\right)^{55} \cdot \left(\dfrac{8}{37}\right)^{55}
   \]

3. **Use the property of exponents that states \((a^m) \cdot (b^m) = (a \cdot b)^m\):**
   \[
   \left(\dfrac{37}{8} \cdot \dfrac{8}{37}\right)^{55}
   \]

4. **Simplify the fraction inside the parentheses:**
   \[
   \dfrac{37}{8} \cdot \dfrac{8}{37} = \dfrac{37 \cdot 8}{8 \cdot 37} = \dfrac{296}{296} = 1
   \]

5. **Raise the simplified fraction to the powe