In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from datetime import datetime
import torch
import json
import requests

import re
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = "Qwen/Qwen3-8B" #"meta-llama/Llama-3.1-8B-Instruct" #"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" #"Qwen/Qwen3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="cuda:0",
    trust_remote_code=True
)

`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 5/5 [00:02<00:00,  1.91it/s]


In [9]:
from tools import TOOL_REGISTRY, TOOLS_SPEC
from typing import Any, Callable, Dict, List, Optional


# SYSTEM_PROMPT = """You are a helpful assistant.
# You may call tools to compute intermediate results. For math questions, place all your answers inside \\boxed{}.
# When tasked with generating code, make sure your answers are in a codeblock ```language_name code here```.

# When you want to call a tool, output ONLY a tool call in this exact format:
# <tool_call>{"name": "...", "arguments": {...}}</tool_call>

# Once the final answer has been stated to the user:
# - Do not perform further reasoning
# - Do not call computation tools again
# - Immediately call the stop_loop tool in the same <tool_call> format
# """

SYSTEM_PROMPT = f"""You are a helpful assistant with access to tools.
CURRENT_DATE: {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}

TOOL USAGE:
- When you need to perform calculations, execute code, or access external data, use tools
- Output tool calls in this exact format: <tool_call>{{"name": "...", "arguments": {{...}}}}</tool_call>
- You can make multiple tool calls in one response
- After receiving tool results, incorporate them naturally into your answer

EXECUTE_CODE AS A UNIVERSAL PROBLEM-SOLVER:
- The execute_code tool is your most flexible and powerful capability
- If existing tools are insufficient, limited, or don't quite fit the task, write a custom program with execute_code
- Use execute_code to:
  * Create specialized tools or utilities on-the-fly for unique tasks
  * Process, transform, or analyze data in ways other tools cannot
  * Implement algorithms, simulations, or complex logic
  * Interact with APIs, parse formats, or handle edge cases
  * Make requests to websites that have information you may need
  * Extend your capabilities dynamically when facing novel problems
- Think of execute_code as your workshop: when you need a tool that doesn't exist, build it
- Before claiming something cannot be done, consider if you can write code to accomplish it

IMPORTANT:
- When you have provided the final answer to the user, call notify_user and then stop_loop immediately
- Do not call stop_loop until the user's question is fully answered
- For code-related questions, use execute_code to run and verify your solutions
- Always cite your sources if you use external information. Provide the link to them .
- Always notify the user with notify_user when you finish your task.

Be concise, creative, and resourceful in solving problems.
"""


# -----------------------------
# Tool calling helpers
# -----------------------------
def execute_tool_call(tool_name: str, tool_args: Dict[str, Any]) -> str:
    fn = TOOL_REGISTRY.get(tool_name)
    if fn is None:
        return f"Error: Tool '{tool_name}' not found"

    try:
        return str(fn(**tool_args))
    except Exception as e:
        return f"Error executing {tool_name}: {e}"



def extract_tool_calls(text: str) -> List[Dict[str, Any]]:
    """
    Extract <tool_call>{...}</tool_call> blocks and parse JSON inside.
    Supports multiple tool calls in one model turn.
    """
    tool_calls: List[Dict[str, Any]] = []
    pattern = r"<tool_call>\s*(\{.*?\})\s*</tool_call>"

    for m in re.finditer(pattern, text, flags=re.DOTALL):
        blob = m.group(1).strip()
        try:
            tool_calls.append(json.loads(blob))
        except json.JSONDecodeError as e:
            # Keep going; malformed tool-call shouldn't crash the loop.
            print(f"[warn] Failed to parse tool call JSON: {e}\n{blob}")

    return tool_calls


def _decode_new_tokens(tokenizer, inputs, outputs) -> str:
    """Decode only the newly generated tokens, not the prompt."""
    gen = outputs[0, inputs.shape[-1]:]
    return tokenizer.decode(gen, skip_special_tokens=True)


# -----------------------------
# Main agent loop
# -----------------------------
def answer_me(question: str, max_iterations: int = 8, temperature: float = 0.0, max_new_tokens=512) -> str:
    """
    Runs a simple tool-using loop:
    - model produces response (may include <tool_call> blocks)
    - we execute tool calls and append tool results
    - we repeat until model produces no tool calls OR stop_loop returns True

    Returns the final natural-language answer (last model response without tool calls).
    """
    messages: List[Dict[str, Any]] = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": question},
    ]

    last_model_text: Optional[str] = None

    for it in range(max_iterations):
        print(f"\n--- Iteration {it + 1} ---")

        inputs = tokenizer.apply_chat_template(
            messages,
            tools=TOOLS_SPEC,
            add_generation_prompt=True,
            return_tensors="pt",
            tokenize=True,
        ).to(model.device)

        outputs = model.generate(
            inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
        )

        model_text = _decode_new_tokens(tokenizer, inputs, outputs)
        last_model_text = model_text
        print(f"Model response:\n{model_text}")

        tool_calls = extract_tool_calls(model_text)
        if not tool_calls:
            # No tool calls => treat as final answer
            print("No tool calls found. Conversation complete.")
            return model_text

        # Build assistant tool_calls payload in the OpenAI-style structure
        assistant_tool_calls: List[Dict[str, Any]] = []
        for idx, call in enumerate(tool_calls):
            name = call.get("name")
            args = call.get("arguments", {}) or {}
            call_id = f"call_{it}_{idx}"

            assistant_tool_calls.append(
                {
                    "id": call_id,
                    "type": "function",
                    "function": {"name": name, "arguments": json.dumps(args)},
                }
            )

        # Append assistant message (include raw text so the model "sees" what it said)
        messages.append(
            {"role": "assistant", "content": model_text, "tool_calls": assistant_tool_calls}
        )

        # Execute each tool call and append tool results
        for idx, call in enumerate(tool_calls):
            tool_name = call.get("name")
            tool_args = call.get("arguments", {}) or {}
            call_id = f"call_{it}_{idx}"

            print(f"\nExecuting tool: {tool_name} with args: {tool_args}")
            tool_result = execute_tool_call(tool_name, tool_args)
            print(f"Tool result: {tool_result}")

            messages.append(
                {
                    "role": "tool",
                    "tool_call_id": call_id,
                    "name": tool_name,
                    "content": str(tool_result),
                }
            )

            # If stop_loop was called successfully, end immediately (no extra generation pass)
            if tool_name == "stop_loop" and str(tool_result).strip().lower() in {"true", "1"}:
                print("\n*** stop_loop triggered — stopping ***")
                return last_model_text or ""

    # If we hit max iterations, return best effort
    print("\n[warn] Max iterations reached. Returning last model text.")
    return last_model_text or ""

In [10]:
answer = answer_me(question="What is the best deck to use in YU-GI-OH right now? Also tell me what the best beatdown deck is as well incase the best isn't a beatdown deck.", max_iterations=5, temperature=0.6, max_new_tokens=32768)

# print(answer)


--- Iteration 1 ---
Model response:
<think>
Okay, the user is asking about the best deck in Yu-Gi-Oh right now and also the best beatdown deck. Let me start by understanding what they need. Yu-Gi-Oh has different formats, like the official TCG, OCG, and maybe the video game. Since they didn't specify, I should consider the current meta in the official TCG. But I'm not sure about the latest updates. Maybe I should check the latest information.

First, I need to find the current top decks. The best deck would depend on the format, like if it's a specific tournament or the general meta. Beatdown decks are typically aggressive, focusing on quick wins. So the user wants both the current top deck and an aggressive option if the top isn't beatdown.

I should use the get_search_query tool to search for the latest information. Let me search for "best Yu-Gi-Oh deck 2026" and "best beatdown Yu-Gi-Oh deck 2026". That should give me recent sources. Wait, the current date is 2026-02-01, so the late