In [1]:
%load_ext autoreload
%autoreload 2

import sys, os

# Go up one directory from `b/` to project root
root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(root_dir)

In [37]:
import logging
from neurosurfer.models.chat_models.transformers import TransformersModel
from neurosurfer import config 
import torch
torch.cuda.empty_cache()

DEFAULT_TRANSFORMERS_MODEL_PARAMS = dict({
    "model_name": "/home/nomi/workspace/Model_Weights/Qwen3-8B-unsloth-bnb-4bit",
    "max_seq_length": 16_000,
    "load_in_4bit": True,
    "enable_thinking": False,  # main_gpu interpretation
    "verbose": False
})

LOGGER = logging.getLogger()
LLM = TransformersModel(
    **DEFAULT_TRANSFORMERS_MODEL_PARAMS,
    stop_words=["Observation:"],
    logger = logging.getLogger(),
)

[92mINFO    [0m | [90m2025-11-17 14:38:09[0m | [96mtransformers.py:init_model[0m | Initializing Transformers model.


[92mINFO    [0m | [90m2025-11-17 14:38:09[0m | [96mmodeling.py:get_balanced_memory[0m | We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00,  2.07it/s]

[92mINFO    [0m | [90m2025-11-17 14:38:10[0m | [96mtransformers.py:init_model[0m | Transformers model initialized successfully.





In [38]:
# streaming response example
from IPython.display import display, Markdown, clear_output

system_prompt = "You are a joker."
user_prompt = """Tell me a short and light-hearted joke."""

stream_response = LLM.ask(
    system_prompt=system_prompt,
    user_prompt=user_prompt,
    stream=True
)

md_display = display(Markdown(""), display_id=True)
for chunk in stream_response:
    chunk = chunk.choices[0].delta.content or ""
    print(chunk, flush=True, end="")



Why don't skeletons fight each other?  
Because they don't have the *guts*! üòÑ

### Agent Test

In [39]:
# agent normal response
from neurosurfer.agents import Agent, AgentConfig
# from neurosurfer.tracing import RichTracer
from pydantic import BaseModel

agent_config = AgentConfig(
    strict_tool_call=True,
    return_stream_by_default=True,
    temperature=0.7,
    max_new_tokens=4096,
)
agent = Agent(llm=LLM, config=agent_config, log_traces=True)

# normal response
print("Normal Response:")
agent_response = agent.run(user_prompt="What is AI (one line)?", stream=False)
# print(agent_response.response)

# # streaming response
# print("\n\nStreaming Response:")
# for c in agent.run(user_prompt="What are top 3 applications of AI (one line)?").response:
#     print(c, flush=True, end="")


Normal Response:
[1;4;33müß† Thinking[0m[1;4;33m...[0m

[1;4;36m[[0m[1;4;36mmain_agent[0m[1;4;36m][0m[1;4;36m Tracing Start![0m
[2m ‚ñ∂ [0m[1;2m[[0m[1;2;36m1[0m[1;2m][0m[1;2m[[0m[2mstep.agent[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.run'[0m
[2m     ‚ñ∂ [0m[1;2m[[0m[1;2;36m2[0m[1;2m][0m[1;2m[[0m[2mstep.llm.call[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.free_text_call'[0m
[2m     ‚óÄ [0m[1;2m[[0m[1;2;36m2[0m[1;2m][0m[1;2m[[0m[2mstep.llm.call[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.free_text_call'[0m[2m took [0m[1;2;36m0.[0m[2m873s; [0m[2;33merror[0m[2m=[0m[2;3;91mFalse[0m
[2m ‚óÄ [0m[1;2m[[0m[1;2;36m1[0m[1;2m][0m[1;2m[[0m[2mstep.agent[0m[1;2m][0m[2m [0m[2;33magent_id[0m[

**Structured Response**

In [None]:
# Structured Response examples
class AIApplication(BaseChatModel):
    title: str
    description: str

class AI(BaseChatModel):
    definition: str
    history: str
    modern_frameworks: str
    applications: list[AIApplication]

user_query = "What is AI and list 3 of its top application, and 3 concerns."
agent_response = agent.run(user_prompt=user_query, output_schema=AI)

print("\n\nStructured Response:")
print(agent_response.response.json_obj)  

[1;4;33müß† Thinking[0m[1;4;33m...[0m

[1;4;36mTracing Start![0m
[2m ‚ñ∂ [0m[1;2m[[0m[1;2;36m1[0m[1;2m][0m[1;2m[[0m[2mstep.agent[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.run'[0m
[2m     ‚ñ∂ [0m[1;2m[[0m[1;2;36m2[0m[1;2m][0m[1;2m[[0m[2mstep.llm.call[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.structured_call.first_pass'[0m
[2m     ‚óÄ [0m[1;2m[[0m[1;2;36m2[0m[1;2m][0m[1;2m[[0m[2mstep.llm.call[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.structured_call.first_pass'[0m[2m took [0m[1;2;36m4.[0m[2m660s; [0m[2;33merror[0m[2m=[0m[2;3;91mFalse[0m
[2m ‚óÄ [0m[1;2m[[0m[1;2;36m1[0m[1;2m][0m[1;2m[[0m[2mstep.agent[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33

In [6]:
agent_response.traces.steps[1].model_dump()

{'step_id': 2,
 'kind': 'llm.call',
 'label': 'agent.structured_call.first_pass',
 'node_id': None,
 'agent_id': 'main_agent',
 'started_at': 1763359998.1342587,
 'duration_ms': 4659,
 'inputs': {'schema': 'AI',
  'system_prompt_len': 52,
  'user_prompt_len': 61,
  'user_prompt': 'What is AI and list 3 of its top application, and 3 concerns.',
  'system_prompt': "You are a precise and rule-abiding assistant.  \nYour task is to produce only a single valid JSON object following the schema below.\n\nStructured Output Contract:\n- Output only JSON ‚Äî no markdown, code fences, or explanations.  \n- JSON must be strictly valid (RFC 8259): use double quotes for all keys and string values.  \n- Do not include extra keys or any text outside the JSON object.  \n- All required fields must be present, even if empty.  \n- Arrays must contain at least one object when applicable.  \n- The JSON must be a single complete object (not pretty-printed, no trailing commas).  \n- Failure to comply with this

**Tool Calling Test**

In [9]:
from neurosurfer.tools.toolkit import Toolkit
from neurosurfer.tools.tool_spec import ToolSpec, ToolParam, ToolReturn
from neurosurfer.tools.base_tool import BaseTool, ToolResponse

# Simple Calculator Tool
class CalculatorTool(BaseTool):
    spec = ToolSpec(
        name="calculator",
        description="Perform basic arithmetic operations such as addition, subtraction, multiplication, and division.",
        when_to_use="Use this tool when you need to perform basic arithmetic operations.",
        inputs=[
            ToolParam(name="num1", type="float", description="The first number.", required=True),
            ToolParam(name="num2", type="float", description="The second number.", required=True),
            ToolParam(name="operation", type="string", description="The operation to perform strictly one of ['add', 'subtract', 'multiply', 'divide'].", required=True)
        ],
        returns=ToolReturn(type="float", description="The result of the arithmetic operation.")
    )

    def __init__(self, final_answer: bool = False):
        self.final_answer = final_answer

    def __call__(self, num1: float, num2: float, operation: str) -> ToolResponse:
        if operation not in ["add", "subtract", "multiply", "divide"]:
            return ToolResponse(
                final_answer=False,
                results="Invalid operation. Supported operations are 'add', 'subtract', 'multiply', and 'divide'.",
                extras={}
            )
        
        if operation == "divide" and num2 == 0:
            return ToolResponse(
                final_answer=False,
                results="Division by zero is not allowed.",
                extras={}
            )
        try:
            num1 = float(num1)
            num2 = float(num2)
            if operation == "add":
                result = num1 + num2
            elif operation == "subtract":
                result = num1 - num2
            elif operation == "multiply":
                result = num1 * num2
            elif operation == "divide":
                result = num1 / num2
        except Exception as e:
            return ToolResponse(
                final_answer=False,
                results=f"An error occurred: {str(e)}",
                extras={}
            )
        
        return ToolResponse(
            final_answer=self.final_answer,
            results=float(result),
            extras={}
        )

calculator_tool = CalculatorTool()
toolkit = Toolkit(tools=[calculator_tool])

# print("Tool description:")
# print(calculator_tool.get_tool_description())
# print()

agent = Agent(llm=LLM, toolkit=toolkit, verbose=True)

print("Agent with choice between tools and plain text:")
agent_response = agent.run(user_prompt="What is AI?", strict_tool_call=False)
# print(agent_response.response)

print("\n\nAgent with strict tool call:")
agent_response = agent.run(user_prompt="What is one forth of a 100?", strict_tool_call=True)
print(agent_response.response)


[92mINFO    [0m | [90m2025-11-17 10:14:12[0m | [96mtoolkit.py:register_tool[0m | Registered tool: calculator
Agent with choice between tools and plain text:
[1;4;33müß† Thinking[0m[1;4;33m...[0m

[1;4;36mTracing Start![0m
[2m ‚ñ∂ [0m[1;2m[[0m[1;2;36m1[0m[1;2m][0m[1;2m[[0m[2mstep.agent[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.run'[0m
[2m     ‚ñ∂ [0m[1;2m[[0m[1;2;36m2[0m[1;2m][0m[1;2m[[0m[2mstep.llm.call[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.route_and_call.router_llm_call'[0m
        [1;32mINFO: Returning plain response[0m
[2m     ‚óÄ [0m[1;2m[[0m[1;2;36m2[0m[1;2m][0m[1;2m[[0m[2mstep.llm.call[0m[1;2m][0m[2m [0m[2;33magent_id[0m[2m=[0m[2;32m'main_agent'[0m[2m [0m[2;33mlabel[0m[2m=[0m[2;32m'agent.route_and_call.router_llm_call'[0m[2m took [0m[1;2;36m1.[0m

In [10]:
step_json = agent_response.traces.steps[1].model_dump()
# print(step_json)

print()
print(step_json["inputs"]["system_prompt"])



You are a stateless tool router.
Your task is to select exactly ONE tool from the catalog below and output STRICT JSON describing how to call it.

Always respond with a single one-line valid JSON object:
{"tool": "<tool_name>", "inputs": {<param>: <value>}}

Rules:
- Output MUST contain exactly the keys "tool" and "inputs".
- Select at most one tool; if none applies or inputs are unclear, use:
  {"tool": "none", "inputs": {}}
- Use only parameters explicitly defined by that tool ‚Äî do NOT invent, rename, or add extra fields.
- Include only required parameters unless an optional one is obviously needed.
- Do NOT produce natural language; emit JSON only.

TOOLS CATALOG:
Available tools:
Tool Name: `calculator`
Description: Perform basic arithmetic operations such as addition, subtraction, multiplication, and division.
When to use: Use this tool when you need to perform basic arithmetic operations.
Tool Inputs:
- `num1`: float (required) ‚Äî The first number.
- `num2`: float (required) 

In [12]:
# from rich.console import Console

# console = Console(force_jupyter=False, force_terminal=True, width=200)

# msg = "step.tool.execute label='agent.route_and_call.tool_execute' agent_id=None took 0.001s; error=False"
# console.print(f"[dim]{msg}[/dim]")
# console.print(f"[dim]{msg}[/dim]")
# console.print(f"[cyan underline]Hello World!")
# console.print("FOO", style="white on blue")
# console.print("[bold italic yellow on red blink]This text is impossible to read")
# console.print("[bold red]alert![/bold red] Something happened")
# console.print("[bold red]\\[trace]![/bold red] Something happened")
# console.print("[underline][bold green]Tracing Start![/bold green] Something happened")
# console.print("[bold]Bold[italic] bold and italic [/bold]italic[/italic]")
# console.print("Visit my [link=https://www.willmcgugan.com]blog[/link]!")

## RAG wiring so the agent ‚Äúunderstands‚Äù the Neurosurf codebase

You‚Äôll ingest the repo once, then run a retriever to answer code questions. The Planner can call the retriever first to form a precise implementation plan.

### FileReader and Chunker Test

In [22]:
# # scripts/index_repo_for_rag.py
# from pathlib import Path
# from neurosurfer.rag.ingestor import RAGIngestor
# from neurosurfer.rag.chunker import Chunker
# from neurosurfer.rag.filereader import FileReader
# from neurosurfer.vectorstores.chroma import ChromaVectorStore
# from neurosurfer.models.embedders.sentence_transformer import SentenceTransformerEmbedder

# embedder = SentenceTransformerEmbedder("intfloat/e5-small-v2")
# vs = ChromaVectorStore(collection_name="neurosurf-repo")
# ing = RAGIngestor(
#     embedder=embedder,
#     vector_store=vs, 
#     chunker=Chunker(), 
#     file_reader=FileReader(),
#     default_metadata={"collection": "neurosurf"}
# )

# root_dir = Path(os.getcwd()).parent.joinpath("neurosurfer")
# ing.add_directory(root_dir)  # the repo root
# print(ing.build())


## Graph AGENT

### YAML Flow

In [40]:
# test web search tool
from neurosurfer.tools.websearch import WebSearchTool, WebSearchConfig
from neurosurfer.tools.toolkit import Toolkit

api_key = os.getenv("SERPAPI_KEY", "API Key not found...")
print("API Key: ", f"{api_key[:8]}...")

web_search_tool = WebSearchTool(
    config=WebSearchConfig(
        engine="serpapi",
        engine_kwargs={"api_key": api_key},
        max_results=2,
        enable_crawl=True,
        max_crawl_results=1,
    ),
    llm=None,
)

searches = web_search_tool(query="What is the capital of France?")
print(searches)

toolkit = Toolkit(tools=[web_search_tool])
# print(toolkit.registry)

API Key:  f443633b...
ToolResponse(final_answer=False, results={'query': 'What is the capital of France?', 'summary': "Top 2 results out of ~787,000,000 results for: 'What is the capital of France?'\n1. Paris ‚Äî https://en.wikipedia.org/wiki/Paris\n2. Paris, France - Intercultural Cities Programme ‚Äî https://www.coe.int/en/web/interculturalcities/paris", 'results': [{'title': 'Paris', 'url': 'https://en.wikipedia.org/wiki/Paris', 'snippet': 'Paris is the capital and largest city of France, with an estimated city population of 2,048,472 in an area of 105.4 km2 (40.7 sq mi), and a metropolitan ...', 'score': None, 'content': 'Coordinates : 48¬∞51‚Ä≤24‚Ä≥N 2¬∞21‚Ä≤8‚Ä≥E \ufeff / \ufeff 48.85667¬∞N 2.35222¬∞E \ufeff / 48.85667; 2.35222 From Wikipedia, the free encyclopedia Capital and largest city of France This article is about the capital city of France. For other uses, see Paris (disambiguation) . "Parisien" redirects here. For other uses, see Parisien (disambiguation) . It has been s

In [41]:
print(searches.results["results"][0]["content"])

searches.results

Coordinates : 48¬∞51‚Ä≤24‚Ä≥N 2¬∞21‚Ä≤8‚Ä≥E Ôªø / Ôªø 48.85667¬∞N 2.35222¬∞E Ôªø / 48.85667; 2.35222 From Wikipedia, the free encyclopedia Capital and largest city of France This article is about the capital city of France. For other uses, see Paris (disambiguation) . "Parisien" redirects here. For other uses, see Parisien (disambiguation) . It has been suggested that portions of this article be split out into articles titled Culture of Paris and Infrastructure in Paris . ( Discuss ) (August 2025) Place in √éle-de-France, France Paris [ a ] is the capital and largest city of France , with an estimated city population of 2,048,472 in an area of 105.4 km 2 (40.7 sq mi), and a metropolitan population of 13,171,056 as of January 2025 [update] . [ 3 ] Located on the river Seine in the centre of the √éle-de-France region, it is the largest metropolitan area and fourth-most populous city in the European Union (EU). Nicknamed the City of Light, partly because of its role in the Age of Enlighte

{'query': 'What is the capital of France?',
 'summary': "Top 2 results out of ~787,000,000 results for: 'What is the capital of France?'\n1. Paris ‚Äî https://en.wikipedia.org/wiki/Paris\n2. Paris, France - Intercultural Cities Programme ‚Äî https://www.coe.int/en/web/interculturalcities/paris",
 'results': [{'title': 'Paris',
   'url': 'https://en.wikipedia.org/wiki/Paris',
   'snippet': 'Paris is the capital and largest city of France, with an estimated city population of 2,048,472 in an area of 105.4 km2 (40.7 sq mi), and a metropolitan ...',
   'score': None,
   'content': 'Coordinates : 48¬∞51‚Ä≤24‚Ä≥N 2¬∞21‚Ä≤8‚Ä≥E \ufeff / \ufeff 48.85667¬∞N 2.35222¬∞E \ufeff / 48.85667; 2.35222 From Wikipedia, the free encyclopedia Capital and largest city of France This article is about the capital city of France. For other uses, see Paris (disambiguation) . "Parisien" redirects here. For other uses, see Parisien (disambiguation) . It has been suggested that portions of this article be split o

In [45]:
import asyncio
from __future__ import annotations

import logging

from neurosurfer.models.chat_models.base import BaseChatModel as ChatBaseChatModel
from graph import load_graph, GraphExecutor
from graph.manager import ManagerConfig

from neurosurfer.agents.common.tracing import RichTracer


def run_async(coro):
    """
    In scripts: runs the coroutine immediately.
    In notebooks: returns the coroutine so you can `await` it.
    """
    try:
        loop = asyncio.get_running_loop()  # Jupyter: loop is already running
    except RuntimeError:
        return asyncio.run(coro)
    else:
        return coro  # caller must: result = await run_async(coro)

# tracer = RichTracer()  # prints each span start/end
graph_spec = load_graph("blog_workflow.yml")
print(graph_spec)

executor = GraphExecutor(
    graph=graph_spec,
    llm=LLM,
    manager_llm=LLM,
    manager_config=ManagerConfig(
        temperature=0.5,
        max_new_tokens=4096,
    ),
    toolkit=toolkit,
    tracer=None,
    log_traces=True,
)

# Run workflow
graph_inputs = {
    "topic_title": "Using tool-augmented LLM agents to build reliable workflows",
    "query": "Compose a blog of about 2000-2500 words about tool-augmented LLM agents.",
    "audience": "Intermediate ML engineers",
    "tone": "Practical and slightly opinionated",
}

results = executor.run(inputs=graph_inputs)
# result = await run_async(executor.run(inputs=graph_inputs))
print("Result:", results)

name='blog_workflow' description='Example multi-agent workflow for writing and reviewing a technical blog using multiple specialized nodes (each node uses an Agent under the hood).\n' inputs=[GraphInputSpec(name='topic_title', type='string', required=True, description=None), GraphInputSpec(name='query', type='string', required=True, description=None)] nodes=[GraphNode(id='research', description=None, purpose='Perform focused research on the requested topic titled {topic_title}.', goal='Collect key facts, terminology, and references that are directly useful for writing a technical blog post.\n', expected_result="A compact, structured summary with sections for 'key_points', 'sources', and 'risks_or_caveats'.\n", tools=['web_search'], depends_on=[], mode=<NodeMode.AUTO: 'auto'>, output_schema=None, model=None, policy=None), GraphNode(id='outline', description=None, purpose='Design a clear structure for the article.', goal='Turn the research summary into a logical outline suitable for a 20

In [None]:
import json

with open("results.json", "w") as writer:
    json.dump(results.model_dump(), writer)

'{"graph": {"name": "blog_workflow", "description": "Example multi-agent workflow for writing and reviewing a technical blog using multiple specialized nodes (each node uses an Agent under the hood).\\n", "inputs": [{"name": "topic_title", "type": "string", "required": true, "description": null}, {"name": "query", "type": "string", "required": true, "description": null}], "nodes": [{"id": "research", "description": null, "purpose": "Perform focused research on the requested topic titled {topic_title}.", "goal": "Collect key facts, terminology, and references that are directly useful for writing a technical blog post.\\n", "expected_result": "A compact, structured summary with sections for \'key_points\', \'sources\', and \'risks_or_caveats\'.\\n", "tools": ["web_search"], "depends_on": [], "mode": "auto", "output_schema": null, "model": null, "policy": null}, {"id": "outline", "description": null, "purpose": "Design a clear structure for the article.", "goal": "Turn the research summar

In [53]:

print(results["results"]["review"].raw_output)

# Review of Draft: "Understanding Tool-Augmented LLM Agents: Architecture, Workflow, and Best Practices"

---

## ‚úÖ **Strengths**

- **Comprehensive Overview**: The draft provides a well-structured overview of tool-augmented LLM agents, covering key components like architecture, workflow, and best practices.
- **Clear Terminology**: The use of consistent terminology (e.g., "tool-augmented agents", "agent loop") helps maintain clarity.
- **Practical Focus**: The inclusion of best practices and workflow diagrams adds practical value for developers and researchers.
- **Up-to-Date References**: The draft references recent advancements in LLM agent systems, such as the use of retrieval-augmented generation (RAG) and tool integration.

---

## ‚ùå **Issues and Concerns**

### 1. **Technical Inaccuracies**

- **Overgeneralization of Tool Integration**: The draft refers to "tools" in a broad sense, but does not clearly distinguish between different types of tools (e.g., API-based, database, 

In [None]:
from __future__ import annotations
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator
from dataclasses import dataclass, field
from typing import Dict

@dataclass
class RouterRetryPolicy:
    """Retry tuning for routing + tool execution."""
    max_route_retries: int = 2
    max_tool_retries: int = 1
    backoff_sec: float = 0.7  # linear backoff

@dataclass
class AgentConfig:
    """
    Top-level configuration for the Agent.
    """
    # Routing:
    allow_input_pruning: bool = True    # drop unknown inputs not in ToolSpec
    repair_with_llm: bool = True        # ask LLM to repair invalid routing/inputs
    strict_tool_call: bool = False      # router must output JSON; else can answer in plain text
    # synonyms: Dict[str, Dict[str, str]] = field(default_factory=dict)  # field -> {from: to}

    # LLM defaults:
    temperature: float = 0.7
    max_new_tokens: int = 512
    return_stream_by_default: bool = False

    # Retries:
    retry: RouterRetryPolicy = field(default_factory=RouterRetryPolicy)

    # Structured-output options:
    strict_json: bool = True                  # enforce RFC 8259 JSON
    max_repair_attempts: int = 1              # for malformed JSON repairs



class NodeBudget(BaseChatModel):
    """
    Budget / LLM-related overrides per node.

    These map directly to AgentConfig fields:
        - temperature      -> AgentConfig.temperature
        - max_new_tokens   -> AgentConfig.max_new_tokens
        - return_stream_by_default -> AgentConfig.return_stream_by_default
    """

    max_new_tokens: Optional[int] = Field(
        default=None,
        description="Override AgentConfig.max_new_tokens for this node only.",
    )
    temperature: Optional[float] = Field(
        default=None,
        description="Override AgentConfig.temperature for this node only.",
    )
    return_stream_by_default: Optional[bool] = Field(
        default=None,
        description="Override AgentConfig.return_stream_by_default for this node only.",
    )


class NodePolicy(BaseChatModel):
    """
    Per-node policy that can override some AgentConfig settings and add
    node-level execution constraints (e.g., timeout).

    YAML example:

        nodes:
          - id: research
            policy:
              retries: 1
              timeout_s: 30
              budget:
                max_new_tokens: 180
                temperature: 0.2
              allow_input_pruning: false
              repair_with_llm: true
              strict_tool_call: true
    """

    retries: Optional[int] = Field(
        default=None,
        description="Override AgentConfig.retry.max_route_retries for this node.",
    )
    timeout_s: Optional[int] = Field(
        default=None,
        description=(
            "Soft timeout for this node in seconds. Execution isn't forcibly "
            "cancelled but the node will be marked as errored if exceeded."
        ),
    )
    budget: Optional[NodeBudget] = None

    # Direct AgentConfig-like overrides
    allow_input_pruning: Optional[bool] = None
    repair_with_llm: Optional[bool] = None
    strict_tool_call: Optional[bool] = None
    strict_json: Optional[bool] = None
    max_repair_attempts: Optional[int] = None

    class Config:
        extra = "ignore"  # ignore unknown keys under 'policy'

c = AgentConfig()

p = NodePolicy(budget=NodeBudget(temperature=1.2))

print(c)
print(p)


AgentConfig(allow_input_pruning=True, repair_with_llm=True, strict_tool_call=False, temperature=0.7, max_new_tokens=512, return_stream_by_default=False, retry=RouterRetryPolicy(max_route_retries=2, max_tool_retries=1, backoff_sec=0.7), strict_json=True, max_repair_attempts=1)
retries=None timeout_s=None budget=NodeBudget(max_new_tokens=None, temperature=1.2, return_stream_by_default=None) allow_input_pruning=None repair_with_llm=None strict_tool_call=None strict_json=None max_repair_attempts=None


### Python API version (no YAML)

In [9]:
import asyncio
from graph import Graph, Node, NodePolicy, GraphConfig, GraphExecutor
from neurosurfer.tools import Toolkit
from neurosurfer.models.chat_models.openai import OpenAIModel

# Reuse your existing toolkit + model
llm = LLM  # already created in your environment
tk = toolkit

graph = Graph(
    name="calc_and_explain",
    config=GraphConfig(max_concurrency=2),
    inputs_schema={"prompt": str},
    nodes=[
        Node(
            id="rewrite",
            fn="general_query_assistant",  # adjust name if needed
            inputs={
                # swap "query" -> "prompt" if your tool expects "prompt"
                "query": (
                    "You will receive a user request. Extract a SINGLE pure arithmetic expression that can be "
                    "evaluated by a calculator (e.g., '(42 * 7) - 5^2' or '0.035 * 12000').\n"
                    "- Do NOT include explanations.\n"
                    "- Return ONLY the expression as plain text.\n\n"
                    "User request:\n${inputs.prompt}"
                )
            },
            outputs=["num1", "num2", "operation"],
            policy=NodePolicy(
                retries=1,
                timeout_s=30,
                budget={"max_new_tokens": 128, "temperature": 0.1},
            ),
        ),
        Node(
            id="compute",
            fn="calculator",
            inputs={"num1": "${rewrite.num1}", "num2": "${rewrite.num2}", "operation": "${rewrite.operation}"},
            outputs=["text"],
            policy=NodePolicy(retries=0, timeout_s=15),
        ),
        Node(
            id="explain",
            fn="general_query_assistant",
            inputs={
                "query": (
                    "Original request: ${inputs.prompt}\n"
                    "Calculator result: ${compute.text}\n\n"
                    "Write a brief, user-friendly explanation of the result (one short paragraph)."
                )
            },
            outputs=["text"],
            policy=NodePolicy(
                retries=1,
                timeout_s=30,
                budget={"max_new_tokens": 180, "temperature": 0.2},
            ),
        ),
    ],
    outputs={"answer": "${explain.text}"},
)

executor = GraphExecutor(llm=llm, toolkit=tk, max_concurrency=2)

result = await run_async(
    executor.run(graph, inputs={"prompt": "Compute 3.5% of 12000 and explain"}, stream=True)
)

print("OK:", result.ok)
print("Answer:\n", result.outputs["answer"])


OK: True
Answer:
 The calculator result for your request is ${compute.text}. This means that after performing the calculation based on your input, the final answer is ${compute.text}. Let me know if you need further assistance!


### Planner-based path (using the YAML as a skeleton)

In [None]:
import asyncio, tempfile, pathlib
from graph import PlannerAgent, FlowLoader, GraphExecutor

# 1) Write the YAML to a temp file (only for this demo)
yaml_text = r"""
name: calc_and_explain
inputs:
  prompt: str
config:
  max_concurrency: 2
nodes:
  - id: rewrite
    kind: task
    fn: general_query_assistant
    inputs:
      query: |
        You will receive a user request. Extract a SINGLE pure arithmetic expression that can be
        evaluated by a calculator (e.g., "(42 * 7) - 5^2" or "0.035 * 12000").
        - Do NOT include explanations.
        - Return ONLY the expression as plain text.

        User request:
        ${inputs.prompt}
    outputs: ["text"]
    policy: { retries: 1, timeout_s: 30, budget: { max_new_tokens: 128, temperature: 0.1 } }

  - id: compute
    kind: task
    fn: calculator
    inputs: { expression: ${rewrite.text} }
    outputs: ["text"]

  - id: explain
    kind: task
    fn: general_query_assistant
    inputs:
      query: |
        Original request: ${inputs.prompt}
        Calculator result: ${compute.text}

        Write a brief, user-friendly explanation of the result (one short paragraph).
    outputs: ["text"]
    policy: { retries: 1, timeout_s: 30, budget: { max_new_tokens: 180, temperature: 0.2 } }

outputs: { answer: ${explain.text} }
""".strip()

tmp = pathlib.Path(tempfile.gettempdir()) / "calc_and_explain.yml"
tmp.write_text(yaml_text)

# 2) Use the planner with a skeleton (so it returns your YAML-based Graph)
planner = PlannerAgent(llm=LLM)  # LLM not used when skeleton is set
graph = planner.plan_from_query(query="Compute 3.5% of 12000 and explain", skeleton=str(tmp))

# 3) Execute
executor = GraphExecutor(llm=LLM, toolkit=toolkit, max_concurrency=2)
result = asyncio.run(executor.run(graph, inputs={"prompt": "Compute 3.5% of 12000 and explain"}))

print("OK:", result.ok)
print(result.outputs["answer"])


Test ToolsRouterAgent

In [8]:
query = "Perform the calculation 20 * 90"

for chunk in tools_router_agent.run(query, temperature=0.7, max_new_tokens=4000):
    print(chunk, end="")


[92mINFO    [0m | [90m2025-11-06 11:08:32[0m | [96mtools_router_agent.py:run[0m | [router] Using tool: calculator
[92mINFO    [0m | [90m2025-11-06 11:08:32[0m | [96mtools_router_agent.py:run[0m | [router] Raw inputs: {'num1': 20.0, 'num2': 90.0, 'operation': 'multiply'}
1800.0

In [9]:
query = "Tell me a light-hearted joke!"

for chunk in tools_router_agent.run(query, temperature=0.7, max_new_tokens=4000):
    print(chunk, end="")

[92mINFO    [0m | [90m2025-11-06 11:08:33[0m | [96mtools_router_agent.py:run[0m | [router] Using tool: general_query_assistant
[92mINFO    [0m | [90m2025-11-06 11:08:33[0m | [96mtools_router_agent.py:run[0m | [router] Raw inputs: {'query': 'Tell me a light-hearted joke!'}
Why don't skeletons fight each other? They don't have the guts!None

## ReactAgent

In [21]:
from neurosurfer.agents.react import ReActAgent, ReActConfig

react_agent = ReActAgent(
    toolkit=toolkit,
    llm=LLM,
    specific_instructions="Always be concise in your answers. Break the task into steps if needed.",
    config=ReActConfig(
        temperature=0.7,
        max_new_tokens=4096,
        allow_input_pruning=True,
        repair_with_llm=True,
        skip_special_tokens=True,
        verbose=True
    )
)

# print(react_agent._system_prompt())
TASK = """Calculate 300 - 300. Then tell me a light-hearted joke about that result."""

for chunk in react_agent.run(TASK):
    print(chunk, end="")




[üß†] Chain of Thoughts...
Thought: I will first calculate 300 - 300 using the calculator tool, and then I will use the general_query_assistant tool to find a light-hearted joke about the result.

Action: {
  "tool": "calculator",
  "inputs": {
    "num1": 300,
    "num2": 300,
    "operation": "subtract"
  },
  "final_answer": false
}



[üß†] Chain of Thoughts...
Thought: The result of the calculation is 0. Now, I will use the general_query_assistant tool to find a light-hearted joke about the result.

Action: {
  "tool": "general_query_assistant",
  "inputs": {
    "query": "Tell me a light-hearted joke about the number 0."
  },
  "final_answer": true
}

Why did the number 0 break up with the number 8?  
Because it found someone more "8" (8) than a zero!



[üß†] Chain of Thoughts...
Thought: The calculation result is 0, and the joke provided is ready. The final answer is complete.

Final Answer: The result of 300 - 300 is 0. Here's a light-hearted joke about it: Why did the number 0 break up with the number 8? Because it found someone more "8" (8) than a zero!