In [1]:
from dotenv import load_dotenv
from IPython.lib.display import FileLink
import time
import httpx

load_dotenv()

True

In [2]:
# cd demos/fastapi_server 
# uv run uvicorn main:app --reload

In [3]:
from enum import Enum
from pathlib import Path

BASE_URL = "http://localhost:8000"

class agent_type_enum(Enum):
    agent_no_tools = "agent_no_tools"
    agent_client_tools = "agent_client_tools"
    agent_frontend_tools = "agent_frontend_tools"
    agent_all_json = "agent_all_json"
    agent_subagents = "agent_subagents"
    agent_cowork = "agent_cowork"


def call_agent_api(
    prompt: str | list[dict] | dict,
    agent_uuid: str | None = None,
    agent_type: str | Enum = "agent_all_json",
    log_file: str | None = None,
) -> str:
    """Call agent API and log streamed response to file.

    Returns:
        Path to the log file
    """
    if isinstance(agent_type, Enum):
        agent_type = agent_type.value

    # Build filename
    timestamp = int(time.time() * 1000)
    if log_file is None:
        log_file = f"temp_logs/{agent_type}_{timestamp}.log"
    else:
        log_file = f"temp_logs/{log_file}_{agent_type}_{timestamp}.log"

    log_path = Path(log_file)
    log_path.parent.mkdir(parents=True, exist_ok=True)

    # Build request payload
    payload = {"user_prompt": prompt}
    if agent_uuid:
        payload["agent_uuid"] = agent_uuid
    else:
        payload["agent_type"] = agent_type

    # Stream response and write to file
    with log_path.open("w") as f:
        with httpx.Client(timeout=None) as client:
            with client.stream("POST", f"{BASE_URL}/agent/run", json=payload) as response:
                for line in response.iter_lines():
                    f.write(line)
                    f.write("\n")
                    # if line.startswith("data: "):
                        # data = line[6:]  # Strip "data: " prefix
                        # if data != "[DONE]":
                            # Unescape newlines and write
                            # f.write(data.replace("\\n", "\n"))
                            # f.write("\n")

    return str(log_path)


In [8]:
path = call_agent_api("Hello", log_file="hello", agent_type=agent_type_enum.agent_all_json)
FileLink(path)

# JSON Formatted Output

## No tools

In [9]:
path = call_agent_api(
    "Hi, what is AI?", 
    log_file="no_tools_what_is_ai", 
    agent_type=agent_type_enum.agent_no_tools
)
FileLink(path)

## Client Tools

In [10]:
path = call_agent_api(
    "Hi, what is 5*(3+2/4)?", 
    log_file="client_tools_math_expression", 
    agent_type=agent_type_enum.agent_client_tools
)
FileLink(path)

## Server Tools

In [11]:
path = call_agent_api(
    "Hi, what is 5*(3+2/4)?", 
    log_file="server_tools_raw_math_expression", 
    agent_type=agent_type_enum.agent_all_json
)
FileLink(path)

In [12]:
test_image = "../../currency_receipt_usd_jpy.png"
# Convert image to base64
import base64

with open(test_image, "rb") as image_file:
    base64_image = base64.b64encode(image_file.read()).decode('utf-8')

In [13]:
prompt = {
    "role": "user",
    "content": [
        {
            "type": "image",
            "source": {
                "type": "base64",
                "media_type": "image/png",
                "data": base64_image,
            },
        },
        {
            "type": "text",
            "text": "I paid the receipt in USD. What is the amount in INR?"
        },
    ]
}
path = call_agent_api(
    prompt, 
    log_file="image_receipt_usd_to_inr", 
    agent_type=agent_type_enum.agent_all_json
)
FileLink(path)

In [14]:
prompt = "Fetch me the latest happenings in the Y Combinator ecosystem"
path = call_agent_api(
    prompt,
    log_file="ycombinator_latest_happenings",
    agent_type=agent_type_enum.agent_all_json
)
FileLink(path)

## PDF citations

In [15]:
prompt = [{
    "type": "document",
    "source": {
        "type": "url",
        "url": "https://assets.anthropic.com/m/1cd9d098ac3e6467/original/Claude-3-Model-Card-October-Addendum.pdf"
    },
    "citations": {"enabled": True}
},
{
    "type": "text",
    "text": "What are the key findings in this document?"
}]

path = call_agent_api(
    prompt, 
    log_file="pdf_citations_model_card_key_findings", 
    agent_type=agent_type_enum.agent_all_json
)
FileLink(path)

## Content citations

In [16]:
prompt = {
    "role": "user",
    "content": [
      {
        "type": "document",
        "source": {
          "type": "text",
          "media_type": "text/plain",
          "data": "The grass is green. The sky is blue."
        },
        "title": "My Document",
        "context": "This is a trustworthy document.",
        "citations": {"enabled": True}
        },
        {
        "type": "text",
        "text": "What color is the grass and sky?"
        }
    ]
    }
path = call_agent_api(
    prompt,
    log_file="content_citations_grass_sky",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)


## Complex markdown Rendering Cases

In [17]:
prompt = "Produce a markdown report with a table + code block + nested bullets about 'How to reduce latency in an API.'"
path = call_agent_api(
    prompt, 
    log_file="markdown_report_latency_in_api", 
    agent_type=agent_type_enum.agent_all_json
)
FileLink(path)

## Image/File Generation

In [18]:
prompt = "Generate a JSON file named `users.json` with 20 synthetic users: id, name, email, country."
path = call_agent_api(
    prompt, 
    log_file="file_generation_users_json", 
    agent_type=agent_type_enum.agent_all_json
)
FileLink(path)

## Content Embedded Tags

In [19]:
prompt = """
<chart_config>
**What Is It?**
A **Chart.js JSON config** is a text format that describes how your chart should look and what data it displays. Think of it like a recipe written in a specific format that Chart.js can read. The chart js json config can be used to show charts in the response.

**Basic Structure**:
```json
{
  "type": "bar",           // Chart type: 'bar', 'line', 'pie', 'doughnut', etc.
  "data": {                // Your chart data
    "labels": [...],       // X-axis labels (array of strings)
    "datasets": [...]      // Your data series (array of objects)
  },
  "options": {             // How the chart looks and behaves
    ...
  }
}
```

To show charts, embed Chart.js JSON config between `<chart>` tags.

**Important:** Place the JSON directly inside the tags without escaping or quotes:

```
<chart>
{
  "type": "bar",
  "data": {
    "labels": ["Label 1", "Label 2"],
    "datasets": [{"label": "My Data", "data": [10, 20]}]
  }
}
</chart>
```

**Complete Template**:
```json
{
  "type": "bar",
  "data": {
    "labels": ["Label 1", "Label 2", "Label 3"],
    "datasets": [{
      "label": "My Dataset",
      "data": [10, 20, 30],
      "backgroundColor": "rgba(75, 192, 192, 0.2)",
      "borderColor": "rgba(75, 192, 192, 1)",
      "borderWidth": 1
    }]
  },
  "options": {
    "responsive": true,
    "plugins": {
      "title": {
        "display": true,
        "text": "My Chart Title"
      }
    },
    "scales": {
      "y": {
        "beginAtZero": true
      }
    }
  }
}
```

**Rules**:
✅ **CAN include**: Numbers, Strings, Booleans, Arrays, Objects, Null

❌ **CANNOT include**:
- Functions or code or callbacks (config is serialized)
- Variables (like `myVariable`)
- Comments (remove `//` when saving as JSON)
- Single quotes (use double quotes only)
</chart_config>

<user_prompt>
Present world population demographic data in a chart.
</user_prompt>
"""
path = call_agent_api(
    prompt,
    log_file="content_embedded_tags_world_population_chart",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)

## Literal newlines

In [20]:
prompt = "Explain the behavior of backslash (\\) in a string. Also explain what \\n does. How is it different from \\\\n or \\r\\n?"
print(prompt)

Explain the behavior of backslash (\) in a string. Also explain what \n does. How is it different from \\n or \r\n?


In [21]:
path = call_agent_api(
    prompt,
    log_file="literal_newlines_backslash_behavior",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)


## Image Tool Results

In [22]:
# Test with an existing image in your repo
prompt = "Read and analyze the image at ../../currency_receipt_usd_jpy.png"
path = call_agent_api(
    prompt,
    log_file="image_tool_read_file",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)

# Frontend Tools API Tests

Test the frontend tools flow:
1. Call `/agent/run` with `agent_frontend_tools` type
2. Agent should pause with `awaiting_frontend_tools` when it calls `user_confirm`
3. Parse the pending tool info from the stream
4. Call `/agent/tool_results` to continue execution


In [23]:
# Update the enum to include all agent types
from enum import Enum
import os

# Ensure temp_logs directory exists
os.makedirs("temp_logs", exist_ok=True)

class agent_type_enum_v2(Enum):
    agent_no_tools = "agent_no_tools"
    agent_client_tools = "agent_client_tools"
    agent_all_json = "agent_all_json"
    agent_subagents = "agent_subagents"
    agent_cowork = "agent_cowork"
    agent_frontend_tools = "agent_frontend_tools"


In [24]:
import json
import re
import html

def call_agent_with_frontend_tools(
    prompt: str | list[dict] | dict,
    agent_uuid: str | None = None,
    agent_type: str | Enum = "agent_frontend_tools",
    log_file: str | None = None,
) -> tuple[str, str | None, list[dict] | None]:
    """Call agent API with frontend tools support.
    
    Returns:
        Tuple of (log_file_path, agent_uuid, pending_frontend_tools)
        - pending_frontend_tools is None if agent completed without pausing
    """
    if isinstance(agent_type, Enum):
        agent_type = agent_type.value

    timestamp = int(time.time() * 1000)
    if log_file is None:
        log_file = f"temp_logs/{agent_type}_{timestamp}.log"
    else:
        log_file = f"temp_logs/{log_file}_{agent_type}_{timestamp}.log"

    payload = {"user_prompt": prompt}
    if agent_uuid:
        payload["agent_uuid"] = agent_uuid
    else:
        payload["agent_type"] = agent_type

    pending_tools = None
    agent_uuid_from_stream = None
    
    with open(log_file, "w") as f:
        with httpx.Client(timeout=None) as client:
            with client.stream("POST", f"{BASE_URL}/agent/run", json=payload) as response:
                for line in response.iter_lines():
                    f.write(line)
                    f.write("\n")
                    
                    # Extract agent_uuid from meta_init (HTML entities are escaped)
                    if "meta_init" in line and "agent_uuid" in line:
                        # Unescape HTML entities first, then search
                        unescaped_line = html.unescape(line)
                        match = re.search(r'"agent_uuid":\s*"([^"]+)"', unescaped_line)
                        if match:
                            agent_uuid_from_stream = match.group(1)
                    
                    # Check for awaiting_frontend_tools tag
                    if "awaiting_frontend_tools" in line:
                        # Extract the data attribute
                        match = re.search(r'data="([^"]+)"', line)
                        if match:
                            # Unescape HTML entities
                            data_str = html.unescape(match.group(1))
                            pending_tools = json.loads(data_str)
                            print(f">>> Found awaiting_frontend_tools: {len(pending_tools)} tool(s)")

    return log_file, agent_uuid_from_stream, pending_tools


In [25]:
def submit_tool_results(
    agent_uuid: str,
    tool_results: list[dict],
    log_file: str | None = None,
) -> str:
    """Submit frontend tool results and continue streaming.
    
    Args:
        agent_uuid: The agent UUID to resume
        tool_results: List of {tool_use_id, content, is_error?} dicts
        log_file: Optional log file name prefix
        
    Returns:
        Path to the log file
    """
    timestamp = int(time.time() * 1000)
    if log_file is None:
        log_file = f"temp_logs/tool_results_{timestamp}.log"
    else:
        log_file = f"temp_logs/{log_file}_{timestamp}.log"

    payload = {
        "agent_uuid": agent_uuid,
        "tool_results": tool_results
    }

    with open(log_file, "w") as f:
        with httpx.Client(timeout=None) as client:
            with client.stream("POST", f"{BASE_URL}/agent/tool_results", json=payload) as response:
                for line in response.iter_lines():
                    f.write(line)
                    f.write("\n")

    return log_file


In [26]:
# Step 1: Call agent with a prompt that should trigger user_confirm
# The agent is configured to ask for confirmation when results are over 50

prompt = "Calculate 25 * 4 for me. Confirm with user before proceeding."
print(f"Prompt: {prompt}")
print("-" * 50)

log_path, agent_uuid, pending_tools = call_agent_with_frontend_tools(
    prompt,
    log_file="frontend_tools_step1_pause",
    agent_type=agent_type_enum_v2.agent_frontend_tools
)

print(f"\nLog file: {log_path}")
print(f"Agent UUID: {agent_uuid}")
print(f"Pending tools: {pending_tools}")

FileLink(log_path)


Prompt: Calculate 25 * 4 for me. Confirm with user before proceeding.
--------------------------------------------------

Log file: temp_logs/frontend_tools_step1_pause_agent_frontend_tools_1771326106327.log
Agent UUID: None
Pending tools: None


In [27]:
# Step 2: Submit frontend tool results (simulating user clicking "yes")
# Only run this if we have pending tools

if pending_tools and agent_uuid:
    # Build tool results - simulate user confirming
    tool_results = [
        {
            "tool_use_id": tool["tool_use_id"],
            "content": "yes",  # User confirmed
            "is_error": False
        }
        for tool in pending_tools
    ]
    
    print(f"Submitting {len(tool_results)} tool result(s):")
    for tr in tool_results:
        print(f"  - {tr['tool_use_id']}: {tr['content']}")
    print("-" * 50)
    
    continuation_log = submit_tool_results(
        agent_uuid=agent_uuid,
        tool_results=tool_results,
        log_file="frontend_tools_step2_continue"
    )
    
    print(f"\nContinuation log: {continuation_log}")
    FileLink(continuation_log)
else:
    print("No pending tools or agent_uuid - agent may have completed without pausing")
    print("Try running the prompt again - Claude's behavior can vary")


No pending tools or agent_uuid - agent may have completed without pausing
Try running the prompt again - Claude's behavior can vary


In [28]:
# View the full flow in one place
print("=" * 60)
print("FRONTEND TOOLS FLOW SUMMARY")
print("=" * 60)

# Read step 1 log (pause)
if 'log_path' in dir():
    print("\n[STEP 1: Initial Request - Agent Pauses]")
    print("-" * 40)
    with open(log_path, 'r') as f:
        content = f.read()
        # Show last few lines to see the pause marker
        lines = content.strip().split('\n')
        for line in lines[-10:]:
            if line.strip():
                print(line[:200] + "..." if len(line) > 200 else line)

# Read step 2 log (continuation)
if 'continuation_log' in dir() and continuation_log:
    print("\n[STEP 2: Tool Results Submitted - Agent Continues]")
    print("-" * 40)
    with open(continuation_log, 'r') as f:
        content = f.read()
        # Show the content
        lines = content.strip().split('\n')
        for line in lines[:15]:  # First 15 lines of continuation
            if line.strip():
                print(line[:200] + "..." if len(line) > 200 else line)
        if len(lines) > 15:
            print(f"... ({len(lines) - 15} more lines)")


FRONTEND TOOLS FLOW SUMMARY

[STEP 1: Initial Request - Agent Pauses]
----------------------------------------
data: {"type":"text","agent":"eac27ee1-b7fa-4e3e-bfb2-7c31b47f0c8a","final":false,"delta":" calculation."}
data: {"type":"text","agent":"eac27ee1-b7fa-4e3e-bfb2-7c31b47f0c8a","final":true,"delta":""}
data: {"type":"tool_call","agent":"eac27ee1-b7fa-4e3e-bfb2-7c31b47f0c8a","final":true,"delta":"{\"message\": \"Do you want me to proceed with calculating 25 × 4?\"}","id":"toolu_012jrKhKVgW6WvvSJdgcdi...
data: {"type":"awaiting_frontend_tools","agent":"eac27ee1-b7fa-4e3e-bfb2-7c31b47f0c8a","final":true,"delta":"[{\"tool_use_id\": \"toolu_012jrKhKVgW6WvvSJdgcdi8n\", \"name\": \"user_confirm\", \"input\...
data: [DONE]


# Multipart File Upload Tests

Test the `/agent/run/multipart` endpoint which accepts files via `multipart/form-data`.
The server base64-encodes files and constructs Anthropic content blocks server-side.

In [4]:
def call_agent_multipart(
    prompt: str,
    files: list[tuple[str, bytes, str]],
    agent_uuid: str | None = None,
    agent_type: str | Enum = "agent_all_json",
    log_file: str | None = None,
) -> str:
    """Call multipart agent API and log streamed response to file.

    Args:
        prompt: Text prompt to send.
        files: List of (filename, content_bytes, mime_type) tuples.
        agent_uuid: Optional UUID to resume an existing agent session.
        agent_type: Agent type for configuration.
        log_file: Optional log file name prefix.

    Returns:
        Path to the log file.
    """
    if isinstance(agent_type, Enum):
        agent_type = agent_type.value

    timestamp = int(time.time() * 1000)
    if log_file is None:
        log_file = f"temp_logs/multipart_{agent_type}_{timestamp}.log"
    else:
        log_file = f"temp_logs/{log_file}_{agent_type}_{timestamp}.log"

    log_path = Path(log_file)
    log_path.parent.mkdir(parents=True, exist_ok=True)

    # Build form data
    data = {"user_prompt": prompt}
    if agent_uuid:
        data["agent_uuid"] = agent_uuid
    else:
        data["agent_type"] = agent_type

    # Build file tuples for httpx: ("files", (filename, content, mime_type))
    file_tuples = [("files", (fn, content, mt)) for fn, content, mt in files]

    with log_path.open("w") as f:
        with httpx.Client(timeout=None) as client:
            with client.stream(
                "POST",
                f"{BASE_URL}/agent/run/multipart",
                data=data,
                files=file_tuples,
            ) as response:
                for line in response.iter_lines():
                    f.write(line)
                    f.write("\n")

    return str(log_path)

## Multipart: Image upload

In [5]:
test_image_path = "../../currency_receipt_usd_jpy.png"

with open(test_image_path, "rb") as f:
    image_bytes = f.read()

path = call_agent_multipart(
    prompt="I paid the receipt in USD. What is the amount in INR?",
    files=[("currency_receipt_usd_jpy.png", image_bytes, "image/png")],
    log_file="multipart_image_receipt",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)

## Multipart: PDF upload

In [6]:
# Download a sample PDF for testing
import httpx as _httpx

pdf_url = "https://assets.anthropic.com/m/1cd9d098ac3e6467/original/Claude-3-Model-Card-October-Addendum.pdf"
pdf_bytes = _httpx.get(pdf_url, follow_redirects=True).content
print(f"Downloaded PDF: {len(pdf_bytes) / 1024:.1f} KB")

path = call_agent_multipart(
    prompt="What are the key findings in this document?",
    files=[("Claude-3-Model-Card.pdf", pdf_bytes, "application/pdf")],
    log_file="multipart_pdf_model_card",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)

Downloaded PDF: 2537.8 KB


## Multipart: Text-only (no files)

In [7]:
# No files attached — should behave the same as the JSON /agent/run endpoint
path = call_agent_multipart(
    prompt="Hello, what is 2 + 2?",
    files=[],
    log_file="multipart_text_only",
    agent_type=agent_type_enum.agent_all_json,
)
FileLink(path)

## Multipart: Unsupported file type (expect 400)

In [8]:
# Sending a .csv file should be rejected with HTTP 400
csv_content = b"name,age\nAlice,30\nBob,25\n"

response = httpx.post(
    f"{BASE_URL}/agent/run/multipart",
    data={"user_prompt": "Analyze this data", "agent_type": "agent_all_json"},
    files=[("files", ("data.csv", csv_content, "text/csv"))],
)
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")

Status: 400
Response: {'detail': "Unsupported file type 'text/csv' for file 'data.csv'. Supported types: application/pdf, image/gif, image/jpeg, image/png, image/webp, text/plain"}


# Anthropic Agent Skills Tests

Test the Anthropic Skills integration (xlsx, pptx, docx, pdf).
Skills run in Anthropic's server-side code execution container and can generate documents.

Uses the `agent_skills` agent type configured with:
- `code_execution` server tool
- `xlsx`, `pptx`, `docx`, `pdf` Anthropic Skills
- Beta headers: `code-execution-2025-08-25`, `skills-2025-10-02`, `files-api-2025-04-14`

## Skills: Generate Excel Spreadsheet

In [4]:
prompt = "Create an Excel spreadsheet with a simple monthly budget: categories (Rent, Food, Transport, Entertainment, Savings) and amounts for 3 months."
path = call_agent_api(
    prompt,
    log_file="skills_xlsx_budget",
    agent_type="agent_skills",
)
FileLink(path)

## Skills: Generate PowerPoint Presentation

In [5]:
prompt = "Create a PowerPoint presentation with 3 slides about the solar system: title slide, inner planets overview, and outer planets overview."
path = call_agent_api(
    prompt,
    log_file="skills_pptx_solar_system",
    agent_type="agent_skills",
)
FileLink(path)