In [None]:
#|default_exp core

# Claudette's source

This is the 'literate' source code for Claudette. You can view the fully rendered version of the notebook [here](https://claudette.answer.ai/core.html), or you can clone the git repo and run the [interactive notebook](https://github.com/AnswerDotAI/claudette/blob/main/00_core.ipynb) in Jupyter. The notebook is converted the [Python module claudette/core.py](https://github.com/AnswerDotAI/claudette/blob/main/claudette/core.py) using [nbdev](https://nbdev.fast.ai/). The goal of this source code is to both create the Python module, and also to teach the reader *how* it is created, without assuming much existing knowledge about Claude's API.

Most of the time you'll see that we write some source code *first*, and then a description or discussion of it *afterwards*.

## Setup

In [None]:
import os
# os.environ['ANTHROPIC_LOG'] = 'debug'

To print every HTTP request and response in full, uncomment the above line. This functionality is provided by Anthropic's SDK.

In [None]:
#| export
import inspect, typing, json
from collections import abc
from dataclasses import dataclass
from typing import get_type_hints, Any
from functools import wraps

from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex
from anthropic.types import (Usage, TextBlock, ServerToolUseBlock,
                             WebSearchToolResultBlock, Message, ToolUseBlock,
                             ThinkingBlock, ServerToolUsage)
from anthropic.resources import messages

import toolslm
from toolslm.funccall import *

from fastcore.meta import delegates
from fastcore.utils import *
from fastcore.xtras import save_iter
from msglm import mk_msg_anthropic as mk_msg, mk_msgs_anthropic as mk_msgs

In [None]:
#| hide
from nbdev import show_doc

In [None]:
from anthropic.types import Model
from claudette.text_editor import *
from typing import get_args
from datetime import datetime
from pprint import pprint
from IPython.display import Image

import warnings

In [None]:
warnings.filterwarnings("ignore", message="Pydantic serializer warnings")

In [None]:
#| export
_all_ = ['mk_msg', 'mk_msgs']

In [None]:
#| export
empty = inspect.Parameter.empty

:::{.callout-tip}

If you're reading the rendered version of this notebook, you'll see an "Exported source" collapsible widget below. If you're reading the source notebook directly, you'll see `#| exports` at the top of the cell. These show that this piece of code will be exported into the python module that this notebook creates. No other code will be included -- any other code in this notebook is just for demonstration, documentation, and testing.

You can toggle expanding/collapsing the source code of all exported sections by using the `</> Code` menu in the top right of the rendered notebook page.

:::

In [None]:
#| exports

model_types = {
    # Anthropic
    'claude-opus-4-1-20250805': 'opus',
    'claude-sonnet-4-20250514': 'sonnet',
    'claude-opus-4-20250514': 'opus-4',
    'claude-3-opus-20240229': 'opus-3',
    'claude-3-7-sonnet-20250219': 'sonnet-3-7',
    'claude-3-5-sonnet-20241022': 'sonnet-3-5',
    'claude-3-haiku-20240307': 'haiku-3',
    'claude-3-5-haiku-20241022': 'haiku-3-5',
    # AWS
    'anthropic.claude-opus-4-1-20250805-v1:0': 'opus',
    'anthropic.claude-3-5-sonnet-20241022-v2:0': 'sonnet',
    'anthropic.claude-3-opus-20240229-v1:0': 'opus-3',
    'anthropic.claude-3-sonnet-20240229-v1:0': 'sonnet',
    'anthropic.claude-3-haiku-20240307-v1:0': 'haiku',
    # Google
    'claude-opus-4-1@20250805': 'opus',
    'claude-3-5-sonnet-v2@20241022': 'sonnet',
    'claude-3-opus@20240229': 'opus-3',
    'claude-3-sonnet@20240229': 'sonnet',
    'claude-3-haiku@20240307': 'haiku',
}

all_models = list(model_types)

In [None]:
#|hide
# Full list of models via the anthropic SDK for reference. Not all models are surfaced by Claudette.
Model

typing.Union[typing.Literal['claude-3-7-sonnet-latest', 'claude-3-7-sonnet-20250219', 'claude-3-5-haiku-latest', 'claude-3-5-haiku-20241022', 'claude-sonnet-4-20250514', 'claude-sonnet-4-0', 'claude-4-sonnet-20250514', 'claude-3-5-sonnet-latest', 'claude-3-5-sonnet-20241022', 'claude-3-5-sonnet-20240620', 'claude-opus-4-0', 'claude-opus-4-20250514', 'claude-4-opus-20250514', 'claude-opus-4-1-20250805', 'claude-3-opus-latest', 'claude-3-opus-20240229', 'claude-3-haiku-20240307'], str]

In [None]:
#| export
models = all_models[:8]

In [None]:
models

['claude-opus-4-1-20250805',
 'claude-sonnet-4-20250514',
 'claude-opus-4-20250514',
 'claude-3-opus-20240229',
 'claude-3-7-sonnet-20250219',
 'claude-3-5-sonnet-20241022',
 'claude-3-haiku-20240307',
 'claude-3-5-haiku-20241022']

In [None]:
#| export
models_aws = [
    'anthropic.claude-opus-4-1-20250805-v1:0',
    'anthropic.claude-sonnet-4-20250514-v1:0',
    'claude-3-5-haiku-20241022',
    'claude-3-7-sonnet-20250219',
    'anthropic.claude-3-opus-20240229-v1:0',
    'anthropic.claude-3-5-sonnet-20241022-v2:0'
]

In [None]:
#| export
models_goog = [
    'claude-opus-4-1@20250805',
    'anthropic.claude-3-sonnet-20240229-v1:0',
    'anthropic.claude-3-haiku-20240307-v1:0',
    'claude-3-opus@20240229',
    'claude-3-5-sonnet-v2@20241022',
    'claude-3-sonnet@20240229',
    'claude-3-haiku@20240307'
]

In [None]:
#| exports
text_only_models = ('claude-3-5-haiku-20241022',)

In [None]:
#| exports
has_streaming_models = set(all_models)
has_system_prompt_models = set(all_models)
has_temperature_models = set(all_models)
has_extended_thinking_models = {
    'claude-opus-4-1-20250805', 'claude-opus-4-20250514', 'claude-sonnet-4-20250514', 'claude-3-7-sonnet-20250219'
}

In [None]:
has_extended_thinking_models

{'claude-3-7-sonnet-20250219',
 'claude-opus-4-1-20250805',
 'claude-opus-4-20250514',
 'claude-sonnet-4-20250514'}

In [None]:
#| exports

def can_stream(m): return m in has_streaming_models
def can_set_system_prompt(m): return m in has_system_prompt_models
def can_set_temperature(m): return m in has_temperature_models
def can_use_extended_thinking(m): return m in has_extended_thinking_models

We include these functions to provide a uniform library interface with cosette since openai models such as o1 do not have many of these capabilities.

In [None]:
assert can_stream('claude-3-5-sonnet-20241022') and can_set_system_prompt('claude-3-5-sonnet-20241022') and can_set_temperature('claude-3-5-sonnet-20241022')

These are the current versions and [prices](https://www.anthropic.com/pricing#anthropic-api) of Anthropic's models at the time of writing.

In [None]:
model = models[1]; model

'claude-sonnet-4-20250514'

For examples, we'll use the latest Sonnet, since it's awesome.

## Antropic SDK

In [None]:
cli = Anthropic()

This is what Anthropic's SDK provides for interacting with Python. To use it, pass it a list of *messages*, with *content* and a *role*. The roles should alternate between *user* and *assistant*.

:::{.callout-tip}

After the code below you'll see an indented section with an orange vertical line on the left. This is used to show the *result* of running the code above. Because the code is running in a Jupyter Notebook, we don't have to use `print` to display results, we can just type the expression directly, as we do with `r` here.

:::

In [None]:
m = {'role': 'user', 'content': "I'm Jeremy"}
r = cli.messages.create(messages=[m], model=model, max_tokens=100)
r

Nice to meet you, Jeremy! How are you doing today?

<details>

- id: `msg_01WJzE6MFdW7s3Tqq3VW5uzF`
- content: `[{'citations': None, 'text': 'Nice to meet you, Jeremy! How are you doing today?', 'type': 'text'}]`
- model: `claude-opus-4-1-20250805`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 10, 'output_tokens': 16, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

### Formatting output

That output is pretty long and hard to read, so let's clean it up. We'll start by pulling out the `Content` part of the message.
To do that, we're going to write our first function which will be included to the `claudette/core.py` module.

:::{.callout-tip}

This is the first exported public function or class we're creating (the previous export was of a variable). In the rendered version of the notebook for these you'll see 4 things, in this order (unless the symbol starts with a single `_`, which indicates it's *private*):

- The signature (with the symbol name as a heading, with a horizontal rule above)
- A table of paramater docs (if provided)
- The doc string (in italics).
- The source code (in a collapsible "Exported source" block)

After that, we generally provide a bit more detail on what we've created, and why, along with a sample usage.

:::

In [None]:
#| exports
def _type(x):
    try: return x.type
    except AttributeError: return x.get('type')

def find_block(r:abc.Mapping, # The message to look in
               blk_type:type|str=TextBlock  # The type of block to find
              ):
    "Find the first block of type `blk_type` in `r.content`."
    f = (lambda x:_type(x)==blk_type) if isinstance(blk_type,str) else (lambda x:isinstance(x,blk_type))
    return first(o for o in r.content if f(o))

This makes it easier to grab the needed parts of Claude's responses, which can include multiple pieces of content. By default, we look for the first text block. That will generally have the content we want to display.

In [None]:
find_block(r)

TextBlock(citations=None, text='Nice to meet you, Jeremy! How are you doing today? Is there anything I can help you with?', type='text')

In [None]:
def contents(r):
    "Helper to get the contents from Claude response `r`."
    blk = find_block(r)
    if not blk and r.content: blk = r.content[0]
    return blk.text.strip() if hasattr(blk,'text') else str(blk)

For display purposes, we often just want to show the text itself.

In [None]:
contents(r)

'Nice to meet you, Jeremy! How are you doing today? Is there anything I can help you with?'

In [None]:
#| exports
@patch
def _repr_markdown_(self:(Message)):
    det = '\n- '.join(f'{k}: `{v}`' for k,v in self.model_dump().items())
    cts = re.sub(r'\$', '&#36;', contents(self))  # escape `$` for jupyter latex
    return f"""{cts}

<details>

- {det}

</details>"""

Jupyter looks for a `_repr_markdown_` method in displayed objects; we add this in order to display just the content text, and collapse full details into a hideable section. Note that `patch` is from [fastcore](https://fastcore.fast.ai/), and is used to add (or replace) functionality in an existing class. We pass the class(es) that we want to patch as type annotations to `self`. In this case, `_repr_markdown_` is being added to Anthropic's `Message` class, so when we display the message now we just see the contents, and the details are hidden away in a collapsible details block.

In [None]:
r

Nice to meet you, Jeremy! How are you doing today? Is there anything I can help you with?

<details>

- id: `msg_01J8ZkXuLCPbVVrnJHLkPE61`
- content: `[{'citations': None, 'text': 'Nice to meet you, Jeremy! How are you doing today? Is there anything I can help you with?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 10, 'output_tokens': 25, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

One key part of the response is the `usage` key, which tells us how many tokens we used by returning a `Usage` object.

We'll add some helpers to make things a bit cleaner for creating and formatting these objects.

In [None]:
r.usage

In: 10; Out: 25; Cache create: 0; Cache read: 0; Total Tokens: 35; Search: 0

In [None]:
#| exports
def server_tool_usage(web_search_requests=0):
    'Little helper to create a server tool usage object'
    return ServerToolUsage(web_search_requests=web_search_requests)

In [None]:
#| exports
def usage(inp=0, # input tokens
          out=0,  # Output tokens
          cache_create=0, # Cache creation tokens
          cache_read=0, # Cache read tokens
          server_tool_use=server_tool_usage() # server tool use
         ):
    'Slightly more concise version of `Usage`.'
    return Usage(input_tokens=inp, output_tokens=out, cache_creation_input_tokens=cache_create,
                 cache_read_input_tokens=cache_read, server_tool_use=server_tool_use)

The constructor provided by Anthropic is rather verbose, so we clean it up a bit, using a lowercase version of the name.

In [None]:
usage(5)

In: 5; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 5; Search: 0

In [None]:
#| exports
def _dgetattr(o,s,d): 
    "Like getattr, but returns the default if the result is None"
    return getattr(o,s,d) or d

@patch(as_prop=True)
def total(self:Usage): return self.input_tokens+self.output_tokens+_dgetattr(self, "cache_creation_input_tokens",0)+_dgetattr(self, "cache_read_input_tokens",0)

Adding a `total` property to `Usage` makes it easier to see how many tokens we've used up altogether.

In [None]:
usage(5,1).total

6

In [None]:
#| exports
@patch
def __repr__(self:Usage):
    io_toks = f'In: {self.input_tokens}; Out: {self.output_tokens}'
    cache_toks = f'Cache create: {_dgetattr(self, "cache_creation_input_tokens",0)}; Cache read: {_dgetattr(self, "cache_read_input_tokens",0)}'
    server_tool_use = _dgetattr(self, "server_tool_use",server_tool_usage())
    server_tool_use_str = f'Search: {server_tool_use.web_search_requests}'
    total_tok = f'Total Tokens: {self.total}'
    return f'{io_toks}; {cache_toks}; {total_tok}; {server_tool_use_str}'

In python, patching `__repr__` lets us change how an object is displayed. (More generally, methods starting and ending in `__` in Python are called `dunder` methods, and have some `magic` behavior -- such as, in this case, changing how an object is displayed.) We won't be directly displaying ServerToolUsage's, so we can handle its display behavior in the same Usage `__repr__`

In [None]:
usage(5)

In: 5; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 5; Search: 0

In [None]:
#| exports
@patch
def __add__(self:ServerToolUsage, b):
    "Add together each of the server tool use counts"
    return ServerToolUsage(web_search_requests=self.web_search_requests+b.web_search_requests)

And, patching `__add__` lets `+` work on a `ServerToolUsage` as well as a `Usage` object.

In [None]:
server_tool_usage(1) + server_tool_usage(2)

ServerToolUsage(web_search_requests=3)

In [None]:
#| exports
@patch
def __add__(self:Usage, b):
    "Add together each of `input_tokens` and `output_tokens`"
    return usage(self.input_tokens+b.input_tokens, self.output_tokens+b.output_tokens,
                 _dgetattr(self,'cache_creation_input_tokens',0)+_dgetattr(b,'cache_creation_input_tokens',0),
                 _dgetattr(self,'cache_read_input_tokens',0)+_dgetattr(b,'cache_read_input_tokens',0),
                 _dgetattr(self,'server_tool_use',server_tool_usage())+_dgetattr(b,'server_tool_use',server_tool_usage()))

In [None]:
r.usage+r.usage + usage(server_tool_use=server_tool_usage(1))

In: 20; Out: 50; Cache create: 0; Cache read: 0; Total Tokens: 70; Search: 1

### Creating messages

Creating correctly formatted `dict`s from scratch every time isn't very handy, so we'll import a couple of helper functions from the `msglm` library.

Let's use `mk_msg` to recreate our msg `{'role': 'user', 'content': "I'm Jeremy"}` from earlier.

In [None]:
prompt = "I'm Jeremy"
m = mk_msg(prompt)
r = cli.messages.create(messages=[m], model=model, max_tokens=100)
r

Hello Jeremy! Nice to meet you. How are you doing today? Is there anything I can help you with or would you like to chat about something in particular?

<details>

- id: `msg_01LURaS5AXkvEg257K5vZCEm`
- content: `[{'citations': None, 'text': 'Hello Jeremy! Nice to meet you. How are you doing today? Is there anything I can help you with or would you like to chat about something in particular?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 10, 'output_tokens': 36, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

We can pass more than just text messages to Claude. As we'll see later we can also pass images, SDK objects, etc. To handle these different data types we need to pass the type along with our content to Claude. 

Here's an example of a multimodal message containing text and images. 

```json
{
    'role': 'user', 
    'content': [
        {'type':'text', 'text':'What is in the image?'},
        {
            'type':'image', 
            'source': {
                'type':'base64', 'media_type':'media_type', 'data': 'data'
            }
        }
    ]
}
```

`mk_msg` infers the type automatically and creates the appropriate data structure. 

LLMs, don't actually have state, but instead dialogs are created by passing back all previous prompts and responses every time. With Claude, they always alternate *user* and *assistant*. We'll use `mk_msgs` from `msglm` to make it easier to build up these dialog lists.

In [None]:
msgs = mk_msgs([prompt, r, "I forgot my name. Can you remind me please?"]) 
msgs

[{'role': 'user', 'content': "I'm Jeremy"},
 {'role': 'assistant',
  'content': [TextBlock(citations=None, text='Hello Jeremy! Nice to meet you. How are you doing today? Is there anything I can help you with or would you like to chat about something in particular?', type='text')]},
 {'role': 'user', 'content': 'I forgot my name. Can you remind me please?'}]

In [None]:
cli.messages.create(messages=msgs, model=model, max_tokens=200)

Your name is Jeremy - you introduced yourself to me just a moment ago in your first message.

<details>

- id: `msg_01SVb9E1RtGWG9GZHpKUfUqg`
- content: `[{'citations': None, 'text': 'Your name is Jeremy - you introduced yourself to me just a moment ago in your first message.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 60, 'output_tokens': 22, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

## Client

In [None]:
#| exports
class Client:
    def __init__(self, model, cli=None, log=False, cache=False):
        "Basic Anthropic messages client."
        self.model,self.use = model,usage()
        self.text_only = model in text_only_models
        self.log = [] if log else None
        self.c = (cli or Anthropic(default_headers={'anthropic-beta': 'prompt-caching-2024-07-31'}))
        self.cache = cache

We'll create a simple `Client` for `Anthropic` which tracks usage stores the model to use. We don't add any methods right away -- instead we'll use `patch` for that so we can add and document them incrementally.

In [None]:
c = Client(model)
c.use

In: 0; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 0; Search: 0

In [None]:
#| exports
@patch
def _r(self:Client, r:Message, prefill=''):
    "Store the result of the message and accrue total usage."
    if prefill:
        blk = find_block(r)
        if blk: blk.text = prefill + (blk.text or '')
    self.result = r
    self.use += r.usage
    self.stop_reason = r.stop_reason
    self.stop_sequence = r.stop_sequence
    return r

We use a `_` prefix on private methods, but we document them here in the interests of literate source code.

`_r` will be used each time we get a new result, to track usage and also to keep the result available for later.

In [None]:
c._r(r)
c.use

In: 10; Out: 36; Cache create: 0; Cache read: 0; Total Tokens: 46; Search: 0

Whereas OpenAI's models use a `stream` parameter for streaming, Anthropic's use a separate method. We implement Anthropic's approach in a private method, and then use a `stream` parameter in `__call__` for consistency:

In [None]:
#| exports
@patch
def _log(self:Client, final, prefill, msgs, **kwargs):
    self._r(final, prefill)
    if self.log is not None: self.log.append({
        "msgs": msgs, **kwargs,
        "result": self.result, "use": self.use, "stop_reason": self.stop_reason, "stop_sequence": self.stop_sequence
    })
    return self.result

Once streaming is complete, we need to store the final message and call any completion callback that's needed.

In [None]:
#| exports
@save_iter
def _stream(o, cm, prefill, cb):
    with cm as s:
        yield prefill
        yield from s.text_stream
        o.value = s.get_final_message()
        cb(o.value)

In [None]:
#| export
def get_types(msgs):
    types = []
    for m in msgs:
        content = m.get('content', [])
        if isinstance(content, list): types.extend(getattr(c, 'type', None) or c['type'] for c in content)
        else: types.append('text')
    return types

In [None]:
get_types(msgs)

['text', 'text', 'text']

In [None]:
#| export
def mk_tool_choice(choose:Union[str,bool,None])->dict:
    "Create a `tool_choice` dict that's 'auto' if `choose` is `None`, 'any' if it is True, or 'tool' otherwise"
    return {"type": "tool", "name": choose} if isinstance(choose,str) else {'type':'any'} if choose else {'type':'auto'}

In [None]:
print(mk_tool_choice('sums'))
print(mk_tool_choice(True))
print(mk_tool_choice(None))

{'type': 'tool', 'name': 'sums'}
{'type': 'any'}
{'type': 'auto'}


Claude can be forced to use a particular tool, or select from a specific list of tools, or decide for itself when to use a tool. If you want to force a tool (or force choosing from a list), include a `tool_choice` param with a dict from `mk_tool_choice`.

Claude supports adding an extra `assistant` message at the end, which contains the *prefill* -- i.e. the text we want Claude to assume the response starts with. However Claude doesn't actually repeat that in the response, so for convenience we add it.

In [None]:
#| exports
@patch
def _precall(self:Client, msgs, prefill, sp, temp, maxtok, maxthinktok, stream,
             stop, tools, tool_choice, kwargs):
    if tools: kwargs['tools'] = [get_schema(o) if callable(o) else o for o in listify(tools)]
    if tool_choice: kwargs['tool_choice'] = mk_tool_choice(tool_choice)
    if maxthinktok: 
        kwargs['thinking'] = {'type':'enabled', 'budget_tokens':maxthinktok} 
        temp,prefill = 1,''
    pref = [prefill.strip()] if prefill else []
    if not isinstance(msgs,list): msgs = [msgs]
    if stop is not None:
        if not isinstance(stop, (list)): stop = [stop]
        kwargs["stop_sequences"] = stop
    msgs = mk_msgs(msgs+pref, cache=self.cache, cache_last_ckpt_only=self.cache)
    assert not ('image' in get_types(msgs) and self.text_only), f"Images not supported by: {self.model}"
    kwargs |= dict(max_tokens=maxtok, system=sp, temperature=temp)
    return msgs, kwargs

In [None]:
#| exports
@patch
@delegates(messages.Messages.create)
def __call__(self:Client,
             msgs:list, # List of messages in the dialog
             sp='', # The system prompt
             temp=0, # Temperature
             maxtok=4096, # Maximum tokens
             maxthinktok=0, # Maximum thinking tokens
             prefill='', # Optional prefill to pass to Claude as start of its response
             stream:bool=False, # Stream response?
             stop=None, # Stop sequence
             tools:Optional[list]=None, # List of tools to make available to Claude
             tool_choice:Optional[dict]=None, # Optionally force use of some tool
             cb=None, # Callback to pass result to when complete
             **kwargs):
    "Make a call to Claude."
    msgs,kwargs = self._precall(msgs, prefill, sp, temp, maxtok, maxthinktok, stream,
                                stop, tools, tool_choice, kwargs)
    m = self.c.messages
    f = m.stream if stream else m.create
    res = f(model=self.model, messages=msgs, **kwargs)
    def _cb(v):
        self._log(v, prefill=prefill, msgs=msgs, **kwargs)
        if cb: cb(v)
    if stream: return _stream(res, prefill, _cb)
    try: return res
    finally: _cb(res)

Defining `__call__` let's us use an object like a function (i.e it's *callable*). We use it as a small wrapper over `messages.create`.

In [None]:
c = Client(model, log=True)
c.use

In: 0; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 0; Search: 0

In [None]:
c('Hi')

Hello! How are you doing today? Is there anything I can help you with?

<details>

- id: `msg_01QnW4AmQNCU7xQoiqaNdnYx`
- content: `[{'citations': None, 'text': 'Hello! How are you doing today? Is there anything I can help you with?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 8, 'output_tokens': 20, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

Usage details are automatically updated after each call:

In [None]:
c.use

In: 8; Out: 20; Cache create: 0; Cache read: 0; Total Tokens: 28; Search: 0

A log of all messages is kept if `log=True` is passed:

In [None]:
pprint(c.log)

[{'max_tokens': 4096,
  'msgs': [{'content': 'Hi', 'role': 'user'}],
  'result': Message(id='msg_01QnW4AmQNCU7xQoiqaNdnYx', content=[TextBlock(citations=None, text='Hello! How are you doing today? Is there anything I can help you with?', type='text')], model='claude-sonnet-4-20250514', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=In: 8; Out: 20; Cache create: 0; Cache read: 0; Total Tokens: 28; Search: 0),
  'stop_reason': 'end_turn',
  'stop_sequence': None,
  'system': '',
  'temperature': 0,
  'use': In: 8; Out: 20; Cache create: 0; Cache read: 0; Total Tokens: 28; Search: 0}]


Let's try out *prefill*:

In [None]:
q = "Very concisely, what is the meaning of life?"
pref = 'According to Douglas Adams, '

In [None]:
c(q, prefill=pref)

According to Douglas Adams, 42. But seriously, there's no universal answer - it's deeply personal. Many find meaning through relationships, purpose, growth, helping others, or spiritual beliefs. The search itself might be part of the point.

<details>

- id: `msg_01UwGPGcytqGDLPkwwkKcmtk`
- content: `[{'citations': None, 'text': "According to Douglas Adams, 42. But seriously, there's no universal answer - it's deeply personal. Many find meaning through relationships, purpose, growth, helping others, or spiritual beliefs. The search itself might be part of the point.", 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 24, 'output_tokens': 46, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
c.use

In: 32; Out: 66; Cache create: 0; Cache read: 0; Total Tokens: 98; Search: 0

We can pass `stream=True` to stream the response back incrementally:

In [None]:
r = c('Hi', stream=True)
for o in r: print(o, end='')

Hello! How are you doing today? Is there anything I can help you with?

In [None]:
c.use

In: 40; Out: 86; Cache create: 0; Cache read: 0; Total Tokens: 126; Search: 0

The full final message after completion of streaming is in the `value` attr of the response:

In [None]:
r.value

Hello! How are you doing today? Is there anything I can help you with?

<details>

- id: `msg_01QNiqERauivLSSrWPuU3HK6`
- content: `[{'citations': None, 'text': 'Hello! How are you doing today? Is there anything I can help you with?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 8, 'output_tokens': 20, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
for o in c(q, prefill=pref, stream=True): print(o, end='')

According to Douglas Adams, 42. But seriously, there's no universal answer - it's deeply personal. Many find meaning through relationships, purpose, growth, helping others, or spiritual beliefs. The search itself might be part of the point.

In [None]:
c.use

In: 64; Out: 132; Cache create: 0; Cache read: 0; Total Tokens: 196; Search: 0

Pass a stop sequence if you want claude to stop generating text when it encounters it.

In [None]:
c("Count from 1 to 10", stop="5")

1, 2, 3, 4,

<details>

- id: `msg_01DuSUcJsaVBiLc7MpoDkFtH`
- content: `[{'citations': None, 'text': '1, 2, 3, 4, ', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `stop_sequence`
- stop_sequence: `5`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 15, 'output_tokens': 14, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

This also works with streaming, and you can pass more than one stop sequence:

In [None]:
for o in c("Count from 1 to 10", stop=["3", "yellow"], stream=True): print(o, end='')
print()
print(c.stop_reason, c.stop_sequence)

1, 2, 
stop_sequence 3


We've shown the token usage but we really care about is pricing. Let's extract the latest [pricing](https://www.anthropic.com/pricing#anthropic-api) from Anthropic into a `pricing` dict.

In [None]:
#| export
pricing = {  # model type: $ / million tokens (input, output, cache write, cache read)
    'opus': (15, 75, 18.75, 1.5),
    'sonnet': (3, 15, 3.75, 0.3),
    'haiku-3': (0.25, 1.25, 0.3, 0.03),
    'haiku-3-5': (1, 3, 1.25, 0.1),
}

In [None]:
#| exports
def get_pricing(m, u):
    return pricing[m][:3] if u.prompt_token_count < 128_000 else pricing[m][3:]

Similarly, let's get the pricing for the latest [server tools]():

In [None]:
#| export
server_tool_pricing = {
    'web_search_requests': 10, # $10 per 1,000
}

We'll patch `Usage` to enable it compute the cost given pricing.

In [None]:
#| exports
@patch
def cost(self:Usage, costs:tuple) -> float:
    cache_w, cache_r = _dgetattr(self, "cache_creation_input_tokens",0), _dgetattr(self, "cache_read_input_tokens",0)
    tok_cost = sum([self.input_tokens * costs[0] +  self.output_tokens * costs[1] +  cache_w * costs[2] + cache_r * costs[3]]) / 1e6
    server_tool_use = _dgetattr(self, "server_tool_use",server_tool_usage())
    server_tool_cost = server_tool_use.web_search_requests * server_tool_pricing['web_search_requests'] / 1e3
    return tok_cost + server_tool_cost

In [None]:
#| exports
@patch(as_prop=True)
def cost(self: Client) -> float: return self.use.cost(pricing[model_types[self.model]])

In [None]:
#| exports
def get_costs(c):
    costs = pricing[model_types[c.model]]
    
    inp_cost = c.use.input_tokens * costs[0] / 1e6
    out_cost = c.use.output_tokens * costs[1] / 1e6

    cache_w = c.use.cache_creation_input_tokens   
    cache_r = c.use.cache_read_input_tokens
    cache_cost = (cache_w * costs[2] + cache_r * costs[3]) / 1e6

    server_tool_use = c.use.server_tool_use
    server_tool_cost = server_tool_use.web_search_requests * server_tool_pricing['web_search_requests'] / 1e3
    return inp_cost, out_cost, cache_cost, cache_w + cache_r, server_tool_cost

The markdown repr of the client itself will show the latest result, along with the usage so far.

In [None]:
#| exports
@patch
def _repr_markdown_(self:Client):
    if not hasattr(self,'result'): return 'No results yet'
    msg = contents(self.result)
    inp_cost, out_cost, cache_cost, cached_toks, server_tool_cost = get_costs(self)
    return f"""{msg}

| Metric | Count | Cost (USD) |
|--------|------:|-----:|
| Input tokens | {self.use.input_tokens:,} | {inp_cost:.6f} |
| Output tokens | {self.use.output_tokens:,} | {out_cost:.6f} |
| Cache tokens | {cached_toks:,} | {cache_cost:.6f} |
| Server tool use | {self.use.server_tool_use.web_search_requests:,} | {server_tool_cost:.6f} |
| **Total** | **{self.use.total:,}** | **${self.cost:.6f}** |"""

In [None]:
c

1, 2,

| Metric | Count | Cost (USD) |
|--------|------:|-----:|
| Input tokens | 94 | 0.000282 |
| Output tokens | 154 | 0.002310 |
| Cache tokens | 0 | 0.000000 |
| Server tool use | 0 | 0.000000 |
| **Total** | **248** | **$0.002592** |

Pass a list of alternating user/assistant messages to give Claude a "dialog".

In [None]:
c(["My name is Jeremy", "Hi Jeremy!", "Can you remind me what my name is?"])

Your name is Jeremy.

<details>

- id: `msg_0127tN29JdZJj3tW9GVtrgbr`
- content: `[{'citations': None, 'text': 'Your name is Jeremy.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 29, 'output_tokens': 8, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

## Tool use

Let's now look more at tool use (aka *function calling*).

For testing, we need a function that Claude can call; we'll write a simple function that adds numbers together, and will tell us when it's being called:

In [None]:
@dataclass
class MySum: val:int

def sums(
    a:int,  # First thing to sum
    b:int=1 # Second thing to sum
) -> int: # The sum of the inputs
    "Adds a + b."
    print(f"Finding the sum of {a} and {b}")
    return MySum(a + b)

In [None]:
a,b = 604542,6458932
pr = f"What is {a}+{b}?"
sp = "Always use tools when calculations are required."

Claudette can autogenerate a schema thanks to the `toolslm` library. We'll force the use of the tool using the function we created earlier.

In [None]:
tools=[get_schema(sums)]
choice = mk_tool_choice('sums')

We'll start a dialog with Claude now. We'll store the messages of our dialog in `msgs`. The first message will be our prompt `pr`, and we'll pass our `tools` schema.

In [None]:
msgs = mk_msgs(pr)
r = c(msgs, sp=sp, tools=tools, tool_choice=choice)
r

ToolUseBlock(id='toolu_01GeiA75mdKZUu4cy3tv277z', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')

<details>

- id: `msg_01L9NLcLdEJRCW9Yewm27mFy`
- content: `[{'id': 'toolu_01GeiA75mdKZUu4cy3tv277z', 'input': {'a': 604542, 'b': 6458932}, 'name': 'sums', 'type': 'tool_use'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `tool_use`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 440, 'output_tokens': 57, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

When Claude decides that it should use a tool, it passes back a `ToolUseBlock` with the name of the tool to call, and the params to use.

We don't want to allow it to call just any possible function (that would be a security disaster!) so we create a *namespace* -- that is, a dictionary of allowable function names to call.

In [None]:
ns = mk_ns(sums)
ns

{'sums': <function __main__.sums(a: int, b: int = 1) -> int>}

`ToolResult` is used for two special cases:

1) When tool calls are RPCs with claudette running on an application server and code execution happening elsewhere, wrapping with a `result_type` field is used as a type descriptor for the claudette client. 

2) Different types are handled in message history with specific format, so `mk_funcres` branches the Anthropic representation (see depending on the `result_type`.

Currently images are the only supported tool result type - see https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/implement-tool-use#example-of-tool-result-with-images for the format implemented in `mk_funcres`.

In [None]:
#| export

class ToolResult(BasicRepr):
    def __init__(self, result_type: str, data): store_attr()
    def __str__(self): return str(self.data)

In [None]:
#| export

def _img_content(b64data):
    return [{"type": "image",
             "source":{"type": "base64", "media_type": "image/png", "data": b64data}},
            {"type": "text", "text": "Captured screenshot."}]

def mk_funcres(fc, ns):
    "Given tool use block 'fc', get tool result, and create a tool_result response."
    res = call_func(fc.name, fc.input, ns=ns, raise_on_err=False)
    if isinstance(res, ToolResult) and res.result_type=="image/png": res = _img_content(res.data) # list
    else: res = str(res.data) if isinstance(res, ToolResult) else str(res)
    return {"type": "tool_result", "tool_use_id": fc.id, "content": res}

We can now use the function requested by Claude. We look it up in `ns`, and pass in the provided parameters.

In [None]:
fcs = [o for o in r.content if isinstance(o,ToolUseBlock)]
fcs

[ToolUseBlock(id='toolu_01GeiA75mdKZUu4cy3tv277z', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')]

In [None]:
res = [mk_funcres(fc, ns=ns) for fc in fcs]
res

Finding the sum of 604542 and 6458932


[{'type': 'tool_result',
  'tool_use_id': 'toolu_01GeiA75mdKZUu4cy3tv277z',
  'content': 'MySum(val=7063474)'}]

In [None]:
def contents(r):
    "Helper to get the contents from Claude response `r`."
    blk = find_block(r)
    if not blk and r.content: blk = r.content[0]
    if hasattr(blk,'text'): return blk.text.strip()
    elif hasattr(blk,'content'): return blk.content.strip()
    return str(blk)

In [None]:
#| exports
def mk_toolres(
    r:abc.Mapping, # Tool use request response from Claude
    ns:Optional[abc.Mapping]=None # Namespace to search for tools
    ):
    "Create a `tool_result` message from response `r`."
    cts = getattr(r, 'content', [])
    res = [mk_msg(r.model_dump(), role='assistant')]
    if ns is None: ns=globals()
    tcs = [mk_funcres(o, ns) for o in cts if isinstance(o,ToolUseBlock)]
    if tcs: res.append(mk_msg(tcs))
    return res

In [None]:
foo = []
foo.append({})
foo.append({})
foo

[{}, {}]

In order to tell Claude the result of the tool call, we pass back the tool use assistant request and the `tool_result` response.

In [None]:
tr = mk_toolres(r, ns=ns)
tr

Finding the sum of 604542 and 6458932


[{'role': 'assistant',
  'content': [{'id': 'toolu_01GeiA75mdKZUu4cy3tv277z',
    'input': {'a': 604542, 'b': 6458932},
    'name': 'sums',
    'type': 'tool_use'}]},
 {'role': 'user',
  'content': [{'type': 'tool_result',
    'tool_use_id': 'toolu_01GeiA75mdKZUu4cy3tv277z',
    'content': 'MySum(val=7063474)'}]}]

In [None]:
msgs

[{'role': 'user', 'content': 'What is 604542+6458932?'}]

We add this to our dialog, and now Claude has all the information it needs to answer our question.

In [None]:
msgs += tr
contents(c(msgs, sp=sp, tools=tools))

'The sum of 604542 + 6458932 is 7,063,474.'

In [None]:
contents(msgs[-1])

'MySum(val=7063474)'

In [None]:
msgs

[{'role': 'user', 'content': 'What is 604542+6458932?'},
 {'role': 'assistant',
  'content': [{'id': 'toolu_01GeiA75mdKZUu4cy3tv277z',
    'input': {'a': 604542, 'b': 6458932},
    'name': 'sums',
    'type': 'tool_use'}]},
 {'role': 'user',
  'content': [{'type': 'tool_result',
    'tool_use_id': 'toolu_01GeiA75mdKZUu4cy3tv277z',
    'content': 'MySum(val=7063474)'}]}]

### Text editing

Anthropic also has a special tool type specific to text editing.

In [None]:
tools = [text_editor_conf['sonnet']]
tools

[{'type': 'text_editor_20250429', 'name': 'str_replace_based_edit_tool'}]

In [None]:
pr = 'Could you please explain my _quarto.yml file?'
msgs = [mk_msg(pr)]
r = c(msgs, sp=sp, tools=tools)
find_block(r, ToolUseBlock)

ToolUseBlock(id='toolu_01LgRJaJMnTkQZs1rcNbgsta', input={'command': 'view', 'path': '_quarto.yml'}, name='str_replace_based_edit_tool', type='tool_use')

We've gone ahead and create a reference implementation that you can directly use from our `text_editor` module. Or use as reference for creating your own. 

In [None]:
ns = mk_ns(str_replace_based_edit_tool)
tr = mk_toolres(r, ns=ns)
msgs += tr
print(contents(c(msgs, sp=sp, tools=tools))[:128])

Great! Let me explain your `_quarto.yml` configuration file section by section:

## Project Configuration
```yaml
project:
  typ


## Structured data

In [None]:
a,b = 604542,6458932
pr = f"What is {a}+{b}?"
sp = "Always use your tools for calculations."

In [None]:
for tools in [sums, [get_schema(sums)]]:
    r = c(pr, tools=tools, tool_choice='sums')
    print(r)

Message(id='msg_01UdVJDcaiEcxXpTtgzVgsRi', content=[ToolUseBlock(id='toolu_01EZvdY4nL7g8LD7xtPF6ztF', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')], model='claude-sonnet-4-20250514', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=In: 435; Out: 53; Cache create: 0; Cache read: 0; Total Tokens: 488; Search: 0)
Message(id='msg_01Er899Zkp5SorPMNVsQwoy8', content=[ToolUseBlock(id='toolu_019Ni4Pxcm3r7YRY36544kcP', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')], model='claude-sonnet-4-20250514', role='assistant', stop_reason='tool_use', stop_sequence=None, type='message', usage=In: 435; Out: 53; Cache create: 0; Cache read: 0; Total Tokens: 488; Search: 0)


In [None]:
ns = mk_ns(sums)
tr = mk_toolres(r, ns=ns)

Finding the sum of 604542 and 6458932


In [None]:
#| exports
@patch
@delegates(Client.__call__)
def structured(self:Client,
               msgs:list, # List of messages in the dialog
               tools:Optional[list]=None, # List of tools to make available to Claude
               ns:Optional[abc.Mapping]=None, # Namespace to search for tools
               **kwargs):
    "Return the value of all tool calls (generally used for structured outputs)"
    tools = listify(tools)
    res = self(msgs, tools=tools, tool_choice=tools, **kwargs)
    if ns is None: ns=mk_ns(*tools)
    cts = getattr(res, 'content', [])
    tcs = [call_func(o.name, o.input, ns=ns) for o in cts if isinstance(o,ToolUseBlock)]
    return tcs

Anthropic's API does not support response formats directly, so instead we provide a `structured` method to use tool calling to achieve the same result. The result of the tool is not passed back to Claude in this case, but instead is returned directly to the user. 

In [None]:
c.structured(pr, tools=[sums])

Finding the sum of 604542 and 6458932


[MySum(val=7063474)]

In [None]:
c

ToolUseBlock(id='toolu_01LhB3XbAjBB7Awm2fjyYdMN', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')

| Metric | Count | Cost (USD) |
|--------|------:|-----:|
| Input tokens | 4,955 | 0.014865 |
| Output tokens | 1,240 | 0.018600 |
| Cache tokens | 0 | 0.000000 |
| Server tool use | 0 | 0.000000 |
| **Total** | **6,195** | **$0.033465** |

## Custom Types with Tools Use

We need to add tool support for custom types too. Let's test out custom types using a minimal example.

In [None]:
class Book(BasicRepr):
    def __init__(self, title: str, pages: int): store_attr()
    def __repr__(self):
        return f"Book Title : {self.title}\nNumber of Pages : {self.pages}"

In [None]:
Book("War and Peace", 950)

Book Title : War and Peace
Number of Pages : 950

In [None]:
def find_page(book: Book, # The book to find the halfway point of
              percent: int, # Percent of a book to read to, e.g. halfway == 50, 
) -> int:
    "The page number corresponding to `percent` completion of a book"
    return round(book.pages * (percent / 100.0))

In [None]:
get_schema(find_page)

{'name': 'find_page',
 'description': 'The page number corresponding to `percent` completion of a book\n\nReturns:\n- type: integer',
 'input_schema': {'type': 'object',
  'properties': {'book': {'type': 'object',
    'description': 'The book to find the halfway point of',
    '$ref': '#/$defs/Book'},
   'percent': {'type': 'integer',
    'description': 'Percent of a book to read to, e.g. halfway == 50,'}},
  'required': ['book', 'percent'],
  '$defs': {'Book': {'type': 'object',
    'properties': {'title': {'type': 'string', 'description': ''},
     'pages': {'type': 'integer', 'description': ''}},
    'title': 'Book',
    'required': ['title', 'pages']}}}}

In [None]:
choice = mk_tool_choice('find_page')
choice

{'type': 'tool', 'name': 'find_page'}

Claudette will pack objects as dict, so we'll transform tool functions with user-defined types into tool functions that accept a dict in lieu of the user-defined type.

First let's convert a single argument:

In [None]:
#| export
def _is_builtin(tp: type):
    "Returns True for built in primitive types or containers"
    return (tp in (str, int, float, bool, complex) or tp is None
        or getattr(tp, '__origin__', None) is not None)  # Pass through all container types

def _convert(val: Dict, # dictionary argument being passed in
            tp: type): # type of the tool function input
    "Convert converts a single argument"
    if val is None or _is_builtin(tp) or not isinstance(val, dict): return val
    return tp(**val)

`_is_builtin` decides whether to pass an argument through as-is. Let's check the argument conversion:

In [None]:
(_is_builtin(int), _is_builtin(Book), _is_builtin(List))

(True, False, True)

In [None]:
(_convert(555, int),
 _convert({"title": "War and Peace", "pages": 923}, Book),
 _convert([1, 2, 3, 4], List))

(555,
 Book Title : War and Peace
 Number of Pages : 923,
 [1, 2, 3, 4])

To apply `tool()` to a function is to return a new function where the user-defined types are replaced with dictionary inputs.

In [None]:
#| export
def tool(func):
    if isinstance(func, dict): return func # it's a schema, so don't change
    hints = get_type_hints(func)
    @wraps(func)
    def wrapper(*args, **kwargs):
        new_args = [_convert(arg, hints[p]) for p,arg in zip(inspect.signature(func).parameters, args)]
        new_kwargs = {k: _convert(v, hints[k]) if k in hints else v for k,v in kwargs.items()}
        return func(*new_args, **new_kwargs)
    return wrapper

A function is transformed into a function with dict arguments substituted for user-defined types. Built-in types such as `percent` here are left untouched.

In [None]:
find_page(book=Book("War and Peace", 950), percent=50)

475

In [None]:
tool(find_page)({"title": "War and Peace", "pages": 950}, percent=50)

475

By passing tools wrapped by `tool()`, user-defined types now work completes without failing in tool calls.

In [None]:
pr = "How many pages do I have to read to get halfway through my 950 page copy of War and Peace"
tools = tool(find_page)
tools

<function __main__.find_page(book: __main__.Book, percent: int) -> int>

In [None]:
r = c(pr, tools=[tools])
find_block(r, ToolUseBlock)

ToolUseBlock(id='toolu_01RKkSknteqc681fzhD9P55L', input={'book': {'title': 'War and Peace', 'pages': 950}, 'percent': 50}, name='find_page', type='tool_use')

In [None]:
tr = mk_toolres(r, ns=[tools])
tr

[{'role': 'assistant',
  'content': [{'citations': None,
    'text': "I'll help you find the halfway point of your copy of War and Peace.",
    'type': 'text'},
   {'id': 'toolu_01RKkSknteqc681fzhD9P55L',
    'input': {'book': {'title': 'War and Peace', 'pages': 950}, 'percent': 50},
    'name': 'find_page',
    'type': 'tool_use'}]},
 {'role': 'user',
  'content': [{'type': 'tool_result',
    'tool_use_id': 'toolu_01RKkSknteqc681fzhD9P55L',
    'content': '475'}]}]

In [None]:
msgs = [pr]+tr
contents(c(msgs, sp=sp, tools=[tools]))

"To get halfway through your 950-page copy of War and Peace, you need to read to page 475. That means you'll have read 475 pages when you reach the halfway point of the book."

## Chat

Rather than manually adding the responses to a dialog, we'll create a simple `Chat` class to do that for us, each time we make a request. We'll also store the system prompt and tools here, to avoid passing them every time.

In [None]:
#| export
class Chat:
    def __init__(self,
                 model:Optional[str]=None, # Model to use (leave empty if passing `cli`)
                 cli:Optional[Client]=None, # Client to use (leave empty if passing `model`)
                 sp='', # Optional system prompt
                 tools:Optional[list]=None, # List of tools to make available to Claude
                 temp=0, # Temperature
                 cont_pr:Optional[str]=None, # User prompt to continue an assistant response
                 cache: bool = False,  # Use Claude cache?
                 hist: list = None,  # Initialize history
                 ns:Optional[abc.Mapping]=None # Namespace to search for tools
                ):
        "Anthropic chat client."
        assert model or cli
        assert cont_pr != "", "cont_pr may not be an empty string"
        self.c = (cli or Client(model, cache=cache))
        if hist is None: hist=[]
        if tools: tools = [tool(t) for t in listify(tools)]
        if ns is None: ns=tools
        self.h,self.sp,self.tools,self.cont_pr,self.temp,self.cache,self.ns = hist,sp,tools,cont_pr,temp,cache,ns

    @property
    def use(self): return self.c.use

The class stores the `Client` that will provide the responses in `c`, and a history of messages in `h`.

In [None]:
sp = "Never mention what tools you use."
chat = Chat(model, sp=sp)
chat.c.use, chat.h

(In: 0; Out: 0; Cache create: 0; Cache read: 0; Total Tokens: 0; Search: 0, [])

In [None]:
chat.c.use.cost(pricing[model_types[chat.c.model]])

0.0

This is clunky. Let's add `cost` as a property for the `Chat` class. It will pass in the appropriate prices for the current model to the usage cost calculator.

In [None]:
#| exports
@patch(as_prop=True)
def cost(self: Chat) -> float: return self.c.cost

In [None]:
chat.cost

0.0

In [None]:
#| exports
@patch
def _post_pr(self:Chat, pr, prev_role):
    if pr is None and prev_role == 'assistant':
        if self.cont_pr is None:
            raise ValueError("Prompt must be given after completion, or use `self.cont_pr`.")
        pr = self.cont_pr # No user prompt, keep the chain
    if pr: self.h.append(mk_msg(pr, cache=self.cache))

In [None]:
#| exports
@patch
def _append_pr(self:Chat, pr=None):
    prev_role = nested_idx(self.h, -1, 'role') if self.h else 'assistant' # First message should be 'user'
    if pr and prev_role == 'user': self() # already user request pending
    self._post_pr(pr, prev_role)

In [None]:
#| exports
@patch
def __call__(self:Chat,
             pr=None,  # Prompt / message
             temp=None, # Temperature
             maxtok=4096, # Maximum tokens
             maxthinktok=0, # Maximum thinking tokens
             stream=False, # Stream response?
             prefill='', # Optional prefill to pass to Claude as start of its response
             tool_choice:Optional[dict]=None, # Optionally force use of some tool
             **kw):
    if temp is None: temp=self.temp
    self._append_pr(pr)
    def _cb(v):
        self.last = mk_toolres(v, ns=self.ns)
        self.h += self.last
    return self.c(self.h, stream=stream, prefill=prefill, sp=self.sp, temp=temp, maxtok=maxtok, maxthinktok=maxthinktok,
                 tools=self.tools, tool_choice=tool_choice, cb=_cb, **kw)

The `__call__` method just passes the request along to the `Client`, but rather than just passing in this one prompt, it appends it to the history and passes it all along. As a result, we now have state!

In [None]:
chat = Chat(model, sp=sp)

In [None]:
chat("I'm Jeremy")
chat("What's my name?")

Your name is Jeremy.

<details>

- id: `msg_01Cz5fbwnwURf3DiUW7STPgS`
- content: `[{'citations': None, 'text': 'Your name is Jeremy.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 42, 'output_tokens': 8, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
chat.use, chat.cost

(In: 59; Out: 25; Cache create: 0; Cache read: 0; Total Tokens: 84; Search: 0,
 0.000552)

Let's try out prefill too:

In [None]:
q = "Very concisely, what is the meaning of life?"
pref = 'According to Douglas Adams,'

In [None]:
chat(q, prefill=pref)

According to Douglas Adams,42. But seriously: to find purpose, connect with others, and create meaning through your choices and relationships.

<details>

- id: `msg_01PWBjh7wapKWZMbnoyvZbEb`
- content: `[{'citations': None, 'text': 'According to Douglas Adams,42. But seriously: to find purpose, connect with others, and create meaning through your choices and relationships.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 70, 'output_tokens': 25, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

By default messages must be in user, assistant, user format. If this isn't followed (aka calling `chat()` without a user message) it will error out:

In [None]:
try: chat()
except ValueError as e: print("Error:", e)

Error: Prompt must be given after completion, or use `self.cont_pr`.


Setting `cont_pr` allows a "default prompt" to be specified when a prompt isn't specified. Usually used to prompt the model to continue.

In [None]:
chat.cont_pr = "Tell me a little more..."
chat()

The meaning of life is deeply personal and has been pondered for millennia. Some common perspectives:

**Philosophical**: Create your own meaning through authentic choices and taking responsibility for your existence (existentialism).

**Religious/Spiritual**: Serve a higher purpose, grow spiritually, or fulfill your role in a divine plan.

**Humanistic**: Reduce suffering, increase happiness, love others, and contribute to human flourishing.

**Biological**: Survive, reproduce, and pass on your genes - though humans transcend this basic drive.

**Practical**: Find what gives you a sense of purpose - whether that's relationships, creativity, helping others, learning, or building something lasting.

Most people find meaning through some combination: deep connections with others, work that feels worthwhile, personal growth, and leaving the world a bit better than they found it.

What resonates with you, Jeremy?

<details>

- id: `msg_014KKaRL4W2FwBkbmjQQtYpn`
- content: `[{'citations': None, 'text': "The meaning of life is deeply personal and has been pondered for millennia. Some common perspectives:\n\n**Philosophical**: Create your own meaning through authentic choices and taking responsibility for your existence (existentialism).\n\n**Religious/Spiritual**: Serve a higher purpose, grow spiritually, or fulfill your role in a divine plan.\n\n**Humanistic**: Reduce suffering, increase happiness, love others, and contribute to human flourishing.\n\n**Biological**: Survive, reproduce, and pass on your genes - though humans transcend this basic drive.\n\n**Practical**: Find what gives you a sense of purpose - whether that's relationships, creativity, helping others, learning, or building something lasting.\n\nMost people find meaning through some combination: deep connections with others, work that feels worthwhile, personal growth, and leaving the world a bit better than they found it.\n\nWhat resonates with you, Jeremy?", 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 104, 'output_tokens': 194, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

We can also use streaming:

In [None]:
chat = Chat(model, sp=sp)
for o in chat("I'm Jeremy", stream=True): print(o, end='')

Hi Jeremy! Nice to meet you. How are you doing today?

In [None]:
r = chat(q, prefill=pref, stream=True)
for o in r: print(o, end='')
r.value

According to Douglas Adams,it's 42. But seriously: to find purpose, connect with others, and create meaning through your choices and relationships.

According to Douglas Adams,it's 42. But seriously: to find purpose, connect with others, and create meaning through your choices and relationships.

<details>

- id: `msg_01TFJ44h1JScyXQWbAHkHGnd`
- content: `[{'citations': None, 'text': "According to Douglas Adams,it's 42. But seriously: to find purpose, connect with others, and create meaning through your choices and relationships.", 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 54, 'output_tokens': 28, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

You can provide a history of messages to initialise `Chat` with:

In [None]:
chat = Chat(model, sp=sp, hist=["Can you guess my name?", "Hmmm I really don't know. Is it 'Merlin G. Penfolds'?"])
chat('Wow how did you know?')

I have to be honest - I was just making a playful, completely random guess! I actually have no way of knowing your real name unless you tell me directly. I don't have access to any information about who you are beyond what you share in our conversation.

So if "Merlin G. Penfolds" actually is your name, that would be an absolutely incredible coincidence! But I suspect you might be playing along with my silly guess. Either way, it's nice to meet you! What would you like me to call you?

<details>

- id: `msg_016sRKuAyakwNnT9FUZfA9i3`
- content: `[{'citations': None, 'text': 'I have to be honest - I was just making a playful, completely random guess! I actually have no way of knowing your real name unless you tell me directly. I don\'t have access to any information about who you are beyond what you share in our conversation.\n\nSo if "Merlin G. Penfolds" actually is your name, that would be an absolutely incredible coincidence! But I suspect you might be playing along with my silly guess. Either way, it\'s nice to meet you! What would you like me to call you?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 58, 'output_tokens': 117, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

### Chat tool use

We automagically get streamlined tool use as well:

In [None]:
pr = f"What is {a}+{b}?"
pr

'What is 604542+6458932?'

In [None]:
chat = Chat(model, sp=sp, tools=[sums])
r = chat(pr)
r

Finding the sum of 604542 and 6458932


ToolUseBlock(id='toolu_01EbJ4kX6ZimASi9C8m89ww3', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')

<details>

- id: `msg_01VQTgrgdgQwBvLkRaMnydJs`
- content: `[{'id': 'toolu_01EbJ4kX6ZimASi9C8m89ww3', 'input': {'a': 604542, 'b': 6458932}, 'name': 'sums', 'type': 'tool_use'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `tool_use`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 437, 'output_tokens': 72, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

Now we need to send this result to Claude—calling the object with no parameters tells it to return the tool result to Claude:

In [None]:
chat()

604542 + 6458932 = 7,063,474

<details>

- id: `msg_01FVvhDSrqvEDYRWEYEwTEhr`
- content: `[{'citations': None, 'text': '604542 + 6458932 = 7,063,474', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 530, 'output_tokens': 19, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

It should be correct, because it actually used our Python function to do the addition. Let's check:

In [None]:
a+b

7063474

Let's try the same thing with streaming:

In [None]:
chat = Chat(model, sp=sp, tools=[sums])
r = chat(pr, stream=True)
for o in r: print(o, end='')

Finding the sum of 604542 and 6458932


The full message, including tool call details, are in `value`:

In [None]:
r.value

ToolUseBlock(id='toolu_012mCCw17t3FTVFTtQXVLau7', input={'a': 604542, 'b': 6458932}, name='sums', type='tool_use')

<details>

- id: `msg_019saR1SeZrBxJZhZqKiWBEp`
- content: `[{'id': 'toolu_012mCCw17t3FTVFTtQXVLau7', 'input': {'a': 604542, 'b': 6458932}, 'name': 'sums', 'type': 'tool_use'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `tool_use`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 437, 'output_tokens': 72, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
r = chat(stream=True)
for o in r: print(o, end='')

604542 + 6458932 = 7,063,474

In [None]:
r.value

604542 + 6458932 = 7,063,474

<details>

- id: `msg_01FCdDxkwbtbjDEnAHY6vrAk`
- content: `[{'citations': None, 'text': '604542 + 6458932 = 7,063,474', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 530, 'output_tokens': 19, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

The history shows both the tool_use and tool_result messages:

In [None]:
chat.h

[{'role': 'user', 'content': 'What is 604542+6458932?'},
 {'role': 'assistant',
  'content': [{'id': 'toolu_012mCCw17t3FTVFTtQXVLau7',
    'input': {'a': 604542, 'b': 6458932},
    'name': 'sums',
    'type': 'tool_use'}]},
 {'role': 'user',
  'content': [{'type': 'tool_result',
    'tool_use_id': 'toolu_012mCCw17t3FTVFTtQXVLau7',
    'content': 'MySum(val=7063474)'}]},
 {'role': 'assistant',
  'content': [{'citations': None,
    'text': '604542 + 6458932 = 7,063,474',
    'type': 'text'}]}]

Let's test a function with user defined types.

In [None]:
chat = Chat(model, sp=sp, tools=[find_page])
r = chat("How many pages is three quarters of the way through my 80 page edition of Tao Te Ching?")
r

ToolUseBlock(id='toolu_01BcnfcEPoNwPTFTGogcy1LL', input={'book': {'title': 'Tao Te Ching', 'pages': 80}, 'percent': 75}, name='find_page', type='tool_use')

<details>

- id: `msg_01JzFUZkSgQsBHy31mUH8858`
- content: `[{'id': 'toolu_01BcnfcEPoNwPTFTGogcy1LL', 'input': {'book': {'title': 'Tao Te Ching', 'pages': 80}, 'percent': 75}, 'name': 'find_page', 'type': 'tool_use'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `tool_use`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 547, 'output_tokens': 86, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
chat()

Three quarters of the way through your 80-page edition of Tao Te Ching would be page 60.

<details>

- id: `msg_01Fj7UTjewyJ82rVEWnFYVUa`
- content: `[{'citations': None, 'text': 'Three quarters of the way through your 80-page edition of Tao Te Ching would be page 60.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 647, 'output_tokens': 29, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
#| exports
@patch
def _repr_markdown_(self:Chat):
    if not hasattr(self.c, 'result'): return 'No results yet'
    last_msg = contents(self.c.result)
    
    def fmt_msg(m):
        t = contents(m)
        if isinstance(t, dict): return t['content']
        return t
        
    history = '\n\n'.join(f"**{m['role']}**: {fmt_msg(m)}" 
                         for m in self.h)
    det = self.c._repr_markdown_().split('\n\n')[-1]
    if history: history = f"""
<details>
<summary>► History</summary>

{history}

</details>
"""

    return f"""{last_msg}
{history}
{det}"""

In [None]:
# TODO: fix history format

In [None]:
chat

Three quarters of the way through your 80-page edition of Tao Te Ching would be page 60.

<details>
<summary>► History</summary>

**user**: H

**assistant**: {'id': 'toolu_01BcnfcEPoNwPTFTGogcy1LL', 'input': {'book': {'title': 'Tao Te Ching', 'pages': 80}, 'percent': 75}, 'name': 'find_page', 'type': 'tool_use'}

**user**: 60

**assistant**: Three quarters of the way through your 80-page edition of Tao Te Ching would be page 60.

</details>

| Metric | Count | Cost (USD) |
|--------|------:|-----:|
| Input tokens | 1,194 | 0.003582 |
| Output tokens | 115 | 0.001725 |
| Cache tokens | 0 | 0.000000 |
| Server tool use | 0 | 0.000000 |
| **Total** | **1,309** | **$0.005307** |

In [None]:
chat = Chat(model, tools=[text_editor_conf['sonnet']], ns=mk_ns(str_replace_based_edit_tool))

When not providing tools directly as Python functions (like `sum`), you **must** create and pass a namespace dictionary (mapping the tool name string to the function object) using the `ns` parameter to methods like `mk_toolres` or `toolloop`. `toolslm` cannot automatically generate the namespace in this case. For schema-based tools (i.e., Python functions), `claudette` handles namespace creation automatically.

In [None]:
r = chat('Please explain very concisely what my _quarto.yml does. It is in the current path. Use your tools')
find_block(r, ToolUseBlock)

ToolUseBlock(id='toolu_01WGRx87aQu2ipe4WaM7LP12', input={'command': 'view', 'path': '_quarto.yml'}, name='str_replace_based_edit_tool', type='tool_use')

In [None]:
chat()

Your `_quarto.yml` configures a Quarto website with:

- **Website type** with custom preview on port 3000
- **HTML styling**: Cosmo theme, custom CSS, table of contents, code tools, and wide layout (1800px body)
- **Navigation**: Primary navbar with search, floating sidebar
- **Social features**: Twitter cards and Open Graph metadata
- **Resources**: Includes .txt files and references nbdev.yml/sidebar.yml for additional configuration
- **Output**: Keeps markdown files and supports CommonMark format

It's set up for a documentation or blog website with code-friendly features and social media integration.

<details>

- id: `msg_01NNsn9Vw9a7NP8KTw53DJRd`
- content: `[{'citations': None, 'text': "Your `_quarto.yml` configures a Quarto website with:\n\n- **Website type** with custom preview on port 3000\n- **HTML styling**: Cosmo theme, custom CSS, table of contents, code tools, and wide layout (1800px body)\n- **Navigation**: Primary navbar with search, floating sidebar\n- **Social features**: Twitter cards and Open Graph metadata\n- **Resources**: Includes .txt files and references nbdev.yml/sidebar.yml for additional configuration\n- **Output**: Keeps markdown files and supports CommonMark format\n\nIt's set up for a documentation or blog website with code-friendly features and social media integration.", 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 1443, 'output_tokens': 147, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

## Images

Claude can handle image data as well. As everyone knows, when testing image APIs you have to use a cute puppy.

In [None]:
# Image is Cute_dog.jpg from Wikimedia
fn = Path('samples/puppy.jpg')
Image(filename=fn, width=200)

<IPython.core.display.Image object>

In [None]:
img = fn.read_bytes()

Claude expects an image message to have the following structure

```js
{
    'role': 'user', 
    'content': [
        {'type':'text', 'text':'What is in the image?'},
        {
            'type':'image', 
            'source': {
                'type':'base64', 'media_type':'media_type', 'data': 'data'
            }
        }
    ]
}
```
`msglm` automatically detects if a message is an image, encodes it, and generates the data structure above.
All we need to do is a create a list containing our image and a query and then pass it to `mk_msg`.

Let's try it out...

In [None]:
q = "In brief, what color flowers are in this image?"
msg = mk_msg([img, q])

In [None]:
c([msg])

The flowers in this image are purple.

<details>

- id: `msg_01KRZB363rveqCZQibBAsatS`
- content: `[{'citations': None, 'text': 'The flowers in this image are purple.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 110, 'output_tokens': 11, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

You don't need to call `mk_msg` on each individual message before passing them to the `Chat` class. Instead you can pass your messages in a list and the `Chat` class will automatically call `mk_msgs` in the background. 

```python
c(["How are you?", r])
```

For messages that contain multiple content types (like an image with a question), you'll need to enclose the message contents in a list as shown below:

```python
c(["How are you?", r, [img, q]])
```

In [None]:
c = Chat(model)
c([img, q])

The flowers in this image are purple.

<details>

- id: `msg_016zRcMZZ1D468Vc9qhedpRS`
- content: `[{'citations': None, 'text': 'The flowers in this image are purple.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 110, 'output_tokens': 11, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
def contents(r):
    "Helper to get the contents from Claude response `r`."
    blk = find_block(r)
    if not blk and r.content: blk = r.content[0]
    if hasattr(blk,'text'): return blk.text.strip()
    elif hasattr(blk,'content'): return blk.content.strip()
    elif hasattr(blk,'source'): return f'*Media Type - {blk.type}*'
    return str(blk)

In [None]:
contents(c.h[0])

'*Media Type - image*'

In [None]:
c

The flowers in this image are purple.

<details>
<summary>► History</summary>

**user**: *Media Type - image*

**assistant**: The flowers in this image are purple.

</details>

| Metric | Count | Cost (USD) |
|--------|------:|-----:|
| Input tokens | 110 | 0.000330 |
| Output tokens | 11 | 0.000165 |
| Cache tokens | 0 | 0.000000 |
| Server tool use | 0 | 0.000000 |
| **Total** | **121** | **$0.000495** |

:::{.callout-note}

Unfortunately, not all Claude models support images 😞. This [table](https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table) summarizes the capabilities of each Claude model and the different modalities they support.

:::

## Caching

Claude supports context caching by adding a `cache_control` header to the message content.

```js
{
    "role": "user",
    "content": [
        {
            "type": "text", 
            "text": "Please cache my message", 
            "cache_control": {"type": "ephemeral"}
        }
    ]
}
```

To cache a message, we simply set `cache=True` when calling `mk_msg`.

In [None]:
mk_msg(['hi', 'there'], cache=True)

```json
{ 'content': [ {'text': 'hi', 'type': 'text'},
               { 'cache_control': {'type': 'ephemeral'},
                 'text': 'there',
                 'type': 'text'}],
  'role': 'user'}
```

Claude also now supports smart cache look-ups, so it's very simple to keep an entire conversation in cache by constantly telling it to update the cache with the latest message. To do this, we just need to set `cache=True` when creating a `Chat`.

In [None]:
chat = Chat(model, sp=sp, cache=True)

Caching has a minimum token limit of 1024 tokens for Sonnet and Opus, and 2048 for Haiku. If your conversation is below this limit, it will not be cached.

In [None]:
chat("Hi, I'm Jeremy.")

Hi Jeremy! Nice to meet you. How are you doing today?

<details>

- id: `msg_01DGxcxV4KVV9N4ksjvebajw`
- content: `[{'citations': None, 'text': 'Hi Jeremy! Nice to meet you. How are you doing today?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 20, 'output_tokens': 17, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
chat.use

In: 20; Out: 17; Cache create: 0; Cache read: 0; Total Tokens: 37; Search: 0

Note the usage: no cache is created, nor used. Now, let's send a long enough message to trigger caching.

In [None]:
chat("""Lorem ipsum dolor sit amet""" * 150)

I see you've sent a very long block of "Lorem ipsum dolor sit amet" repeated many times! Lorem ipsum is that classic placeholder text that's commonly used in design and publishing.

Was this intentional, or did something go wrong with copy-pasting? Either way, no worries - happens to the best of us! Is there something specific you'd like to chat about, Jeremy?

<details>

- id: `msg_019muc6q8xMKoo69HVXb1pD9`
- content: `[{'citations': None, 'text': 'I see you\'ve sent a very long block of "Lorem ipsum dolor sit amet" repeated many times! Lorem ipsum is that classic placeholder text that\'s commonly used in design and publishing.\n\nWas this intentional, or did something go wrong with copy-pasting? Either way, no worries - happens to the best of us! Is there something specific you\'d like to chat about, Jeremy?', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 1084, 'cache_read_input_tokens': 0, 'input_tokens': 4, 'output_tokens': 84, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
chat.use

In: 24; Out: 101; Cache create: 1084; Cache read: 0; Total Tokens: 1209; Search: 0

The context is now long enough for cache to be used. All the conversation history has now been written to the temporary cache. Any subsequent message will read from it rather than re-processing the entire conversation history.

In [None]:
chat("Oh thank you! Sorry, my lorem ipsum generator got out of control!")

Haha, no problem at all! Those lorem ipsum generators can definitely get a bit enthusiastic sometimes. It's like they're trying to fill every possible space with placeholder text!

Are you working on some kind of design or layout project? Or were you just experimenting with text generation? I'm curious what you were up to that needed lorem ipsum in the first place.

<details>

- id: `msg_01JRjrjQMh32VQRNAB5MvSxC`
- content: `[{'citations': None, 'text': "Haha, no problem at all! Those lorem ipsum generators can definitely get a bit enthusiastic sometimes. It's like they're trying to fill every possible space with placeholder text!\n\nAre you working on some kind of design or layout project? Or were you just experimenting with text generation? I'm curious what you were up to that needed lorem ipsum in the first place.", 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 102, 'cache_read_input_tokens': 1084, 'input_tokens': 4, 'output_tokens': 80, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

In [None]:
chat.use

In: 28; Out: 181; Cache create: 1186; Cache read: 1084; Total Tokens: 2479; Search: 0

## Extended Thinking

Claude >=3.7 Sonnet & Opus have enhanced reasoning capabilities for complex tasks. See [docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more info.

We can enable extended thinking by passing a `thinking` param with the following structure.

```js
thinking={ "type": "enabled", "budget_tokens": 16000 }
```

When extended thinking is enabled a thinking block is included in the response as shown below.

```js
{
  "content": [
    {
      "type": "thinking",
      "thinking": "To approach this, let's think about...",
      "signature": "Imtakcjsu38219c0.eyJoYXNoIjoiYWJjM0NTY3fQ...."
    },
    {
      "type": "text",
      "text": "Yes, there are infinitely many prime numbers such that..."
    }
  ]
}
```

*Note: When thinking is [enabled](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking) `prefill` must be empty and the `temp` must be 1.*

In [None]:
#| export
def think_md(txt, thk):
    return f"""
{txt}

<details>
<summary>Thinking</summary>
{thk}
</details>
"""

In [None]:
def contents(r, show_thk=True):
    "Helper to get the contents from Claude response `r`."
    blk = find_block(r)
    if show_thk:
        tk_blk = find_block(r, blk_type=ThinkingBlock)
        if tk_blk: return think_md(blk.text.strip(), tk_blk.thinking.strip())
    if not blk and r.content: blk = r.content[0]
    if hasattr(blk,'text'): return blk.text.strip()
    elif hasattr(blk,'content'): return blk.content.strip()
    elif hasattr(blk,'source'): return f'*Media Type - {blk.type}*'
    return str(blk)

Let's call the model without extended thinking enabled. 

In [None]:
chat = Chat(model)

In [None]:
chat("Write a sentence about Python!")

Python is a versatile, high-level programming language known for its clean syntax and readability, making it popular for everything from web development and data science to artificial intelligence and automation.

<details>

- id: `msg_01114DSaRnGmBtCzygqtoUhN`
- content: `[{'citations': None, 'text': 'Python is a versatile, high-level programming language known for its clean syntax and readability, making it popular for everything from web development and data science to artificial intelligence and automation.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 13, 'output_tokens': 40, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

Now, let's call the model with extended thinking enabled.

In [None]:
chat("Write a sentence about Python!", maxthinktok=1024)


Python's extensive library ecosystem and cross-platform compatibility have made it one of the most widely-used programming languages for both beginners learning to code and experienced developers building complex applications.

<details>
<summary>Thinking</summary>
The human is asking me to write a sentence about Python again. They might want a different sentence this time, or they might be testing if I give the same response. I should provide a new sentence about Python that highlights different aspects than my previous response.
</details>


<details>

- id: `msg_01PKcPntFESWXzjK7GEjj9Zi`
- content: `[{'signature': 'ErcDCkYIBRgCKkBzg7pG5Kx4zDAzkJY4RNS1o81F8sgEmt3gOz0bcUhL0T5K6ZEcQJ/wxoRiwMzn9zEhZyxDzpJ6BDBbedmE64JlEgwQ8SBddgUbHQZKCHQaDAzA+qpkcTCPHcIfiiIw7xOn95vxyqbP3TZb6s7vssIptiHANeVGy9e+4z3XTaG0VKcjHowP+3BAkSWrYsXwKp4CblIPJ3w7Sz2fC3yU5M43f7O7mqZXuooFfTWLF8xiCZ9OKxuIyNjzO+MJbdNNxhcEmjLnV0nd8g7wzyzon516dFTxqIGOb6c2Ll3unOGeWmx3igx8ike5+4dAW2kUJffdT6zd+JC1Of+0oU8+mfNu91HvjLW7080r5d5smBQksoDjZSVCaSiUpz/JbLaZXZWGHCQBrxUZqhOlEK8rXcoTL94LedvTgCwCugZizZNuP2TfIxFf5qGMMAbfcKcSt2MPkjZOyUeDDzqG0U17HkxABPE884MC1ATbm7uEHifZZSPKCFzT3J/VG4AvqF/Pqp7YXrZSPRnOuqTf6nY2tEUNXZgdaFmTEGTCfk+u8ShxWg0vgbKvmwjJp3qkyWuMpRgB', 'thinking': 'The human is asking me to write a sentence about Python again. They might want a different sentence this time, or they might be testing if I give the same response. I should provide a new sentence about Python that highlights different aspects than my previous response.', 'type': 'thinking'}, {'citations': None, 'text': "Python's extensive library ecosystem and cross-platform compatibility have made it one of the most widely-used programming languages for both beginners learning to code and experienced developers building complex applications.", 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 0, 'input_tokens': 90, 'output_tokens': 99, 'server_tool_use': None, 'service_tier': 'standard'}`

</details>

## Server Tools and Web Search

The `str_replace` special tool type is a client side tool, i.e., one where we provide the implementation. However, Anthropic also supports server side tools. The current one available is their search tool, which you can find the documentation for [here](https://docs.anthropic.com/en/docs/build-with-claude/tool-use/web-search-tool). When provided as a tool to claude, claude can decide to search the web in order to answer or solve the task at hand.

In [None]:
#| export
def search_conf(max_uses:int=None, allowed_domains:list=None, blocked_domains:list=None, user_location:dict=None):
    'Little helper to create a search tool config'
    conf = {'type': 'web_search_20250305', 'name': 'web_search'}
    if max_uses: conf['max_uses'] = max_uses
    if allowed_domains: conf['allowed_domains'] = allowed_domains
    if blocked_domains: conf['blocked_domains'] = blocked_domains
    if user_location: conf['user_location'] = user_location
    return conf

Similar to client side tools, you provide to the `tools` argument in the anthropic api a non-schema dictionary with the tool's name, type, and any additional metadata specific to that tool. Here's a function to make that process easier for the web search tool.

In [None]:
search_conf()

{'type': 'web_search_20250305', 'name': 'web_search'}

The web search tool returns a list of `TextBlock`s comprised of response text from the model, `ServerToolUseBlock` and server tool results block such as `WebSearchToolResultBlock`. Some of these `TextBlock`s will contain citations with references to the results of the web search tool. Here is what all this looks like:

```js
{
  "content": [
    {
      "type": "text",
      "text": "I'll check the current weather in...",
    },
    {
      "type": "server_tool_use",
      "name": "web_search",
      "input": {"query": "San Diego weather forecast today May 12 2025"},
      "id":"srvtoolu_014t7fS449voTHRCVzi5jQGC"
    },
    {
      "type": "web_search_tool_result",
      "tool_use_id": "srvtoolu_014t7fS449voTHRCVzi5jQGC",
      "content": [
        "type": "web_search_result",
        "title": "Heat Advisory issued May 9...",
        "url": "https://kesq.com/weather/...",
        ...
      ]
    }
    {
      "type": "text",
      "citations": [
        {
            "cited_text": 'The average temperature during this month...',
            "title": "Weather San Diego in May 2025:...",
            "url": "https://en.climate-data.org/...",
            "encrypted_index": "EpMBCioIAxgCIiQ4ODk4YTF..."
        }
      ],
      "text": "The average temperature in San Diego during May is..."
    },
    ...
  ]
}
```

 Let's update our `contents` function to handle these cases. For handling citations, we will use the excellent reference syntax in markdown to make clickable citation links.

In [None]:
#| export
def find_blocks(r, blk_type=TextBlock, type='text'):
    "Helper to find all blocks of type `blk_type` in response `r`."
    if isinstance(r, dict): f = lambda b: b.get('type') == 'text'
    else: f = lambda b: isinstance(b, TextBlock)
    return [b for b in getattr(r, "content", []) if f(b)]

In [None]:
#| export
def blks2cited_txt(txt_blks):
    "Helper to get the contents from a list of `TextBlock`s, with citations."
    text_sections, citations = [], []
    for blk in txt_blks:
        if isinstance(blk, dict): blk = AttrDict(blk)
        section = blk.text
        if getattr(blk, 'citations', None):
            markers = []
            for cit in blk.citations:
                citations.append(cit)
                markers.append(f"[^{len(citations)}]")
            section = f"{section} " + " ".join(markers)
        text_sections.append(section)
    body = "".join(text_sections)
    def _cite(i, cit):
        esc = cit.cited_text.replace('"', r'\"')
        return f'[^{i+1}]: {cit.url}\n\t"{esc}"'
    if citations:
        refs = '\n\n'.join(L.enumerate(citations).starmap(_cite))
        body = f"{body}\n\n{refs}" if body else refs
    return body

In [None]:
#| export
def contents(r, show_thk=True):
    "Helper to get the contents from Claude response `r`."
    blks = find_blocks(r, blk_type=TextBlock)
    content = None
    if blks: content = blks2cited_txt(blks)
    if show_thk:
        tk_blk = find_block(r, blk_type=ThinkingBlock)
        if tk_blk: return think_md(content, tk_blk.thinking.strip())
    if not content:
        blk = find_block(r)
        if not blk and getattr(r, "content", None): blk = r.content[0]
        if hasattr(blk, "text"): content = blk.text.strip()
        elif hasattr(blk, "content"): content = blk.content.strip()
        elif hasattr(blk, "source"): content = f"*Media Type - {blk.type}*"
        else: content = str(blk)
    return content

In [None]:
chat = Chat(model, sp='Be concise in your responses.', tools=[search_conf()], cache=True)
pr = 'What is the weather in San Diego?'
r = chat(pr)
r

Based on the search results, here's the current weather information for San Diego:

Today's weather in San Diego features cloudy skies early, followed by partial clearing, with a high of 71°F and southwest winds at 5 to 10 mph. [^1] Tonight will be cloudy with a low around 65°F. [^2]

The air quality is generally acceptable for most individuals, though sensitive groups may experience minor to moderate symptoms from long-term exposure. [^3]

For more detailed and up-to-date forecasts, you can check the National Weather Service or local San Diego weather sources for the most current conditions and extended forecasts.

[^1]: https://www.wunderground.com/hourly/us/ca/san-diego
	"zoom out ... Cloudy skies early, followed by partial clearing. High 71F. Winds SW at 5 to 10 mph. "

[^2]: https://www.wunderground.com/hourly/us/ca/san-diego
	"... Cloudy. Low around 65F."

[^3]: https://www.accuweather.com/en/us/san-diego/92101/weather-forecast/347628
	"The air quality is generally acceptable for most individuals. However, sensitive groups may experience minor to moderate symptoms from long-term expos..."

<details>

- id: `msg_01Pbeynbff5PBG8dkbHMMUz8`
- content: `[{'id': 'srvtoolu_01FZ6GitT4MtseTnQXyecch6', 'input': {'query': 'San Diego weather today'}, 'name': 'web_search', 'type': 'server_tool_use'}, {'content': [{'encrypted_content': 'EqcCCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDH+vJSlFVD+uWNtELRoMC8WqLtmu8ep/QP6YIjDXYWSzADJMvKvC5p1F7t+S7ead/IX8XkPSs7HF5lH1NG56kddNqm3CUa2tmC+9BpcqqgE7ITxhD1Ou8vCy1x+MAM1wwi5xedd6nbsCXybeCmBppwZeD3QO8wYiA8qow1W50A740cZNc9eioM7qVPwO5jTeW90xbj6tcUUqCXV3PUpEEdzb8o1X0azHBBNd1Bma4NCZat6tC9X1XNTSIghtIWtd/wf2iz4T0Yr3eTDMGmV5LgDhpJIYpQEt7wDgMEtHkHLhwEapTruEpFb3mGh3Du7gBGNXWee//g2aABgD', 'page_age': 'June 18, 2025', 'title': '10-Day Weather Forecast for San Diego, CA - The Weather Channel | weather.com', 'type': 'web_search_result', 'url': 'https://weather.com/weather/tenday/l/San+Diego+CA?canonicalCityId=3b2b39ed755b459b725bf2a29c71d678'}, {'encrypted_content': 'EtoLCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDIH23ZAFsKxY5LzTEBoMJyk5iA/nTe9cPFN0IjA+lJo9/GTUrp+rqc/PZ4K2jM0m1eFMlcDN2DPw7uN2uEWvB1Vb46LwurwVF1bioi4q3QrXgzDjIzigfLYxfhhK7ZlweDuXOPwx3j1DosoUxd3QMabtisnn/KNjRxopp0HjDGG4xz8rL0Gxpm3xSPdNbrp6oD5F470GtMy1OAxs+mgiK4FyHYdGEzHYNhMMXGK6cVZIwiER2OVFWOYm9KaPmWSwcZidQMtwQAVoSzGZ/lOpAGewphMyW/PehOMafjIUw221PtawCs/ot7s5ZIsXduCmcYwggLgMmOp2beBA1J4ryW8F/L6F6N3v1rcjh3hifLQsh3thFbMNtLlSUxIxQMtvbNGsONBLRy06gyMpRIgxjPaYaJ2TNXwZFWdveFpOSqS4FHW1QivoQnWzc7AYKzZ27VPOpZ9BWwPvXzjnE2lvhKYfXSIhoNxMcQVoSkFb9UUgSSzMZZ8pmoxoGcfa4C2+VSHuNDXbtEDXX1NOntZf4l4C0gsn5kE31mGS3pv/0FuOGVTA1ybIGe6/GgQN2FRw/k3y8IRxe1K/q6IBQV5/QmSndJAyTVGf3KAW41k8eELo5xAGLvjcERjyyiKJ+0FkL4b8+AXZSreeLHRAfUq4RNLADdJJy84t2CT6c2luWsu92DEEgCWdxdyMtGEunnR16WL5Ogbjzh7iq5E3Bh1kIloif8+sm7M7hrz+zG4iUgPtUj14cO8WPm2bugyZaljXUZRHq9gkZTXhSela22bUe0XQGqmnlk2RI//O2qLFxI4SWg+hvTfL3a6sBRyTb3VwsVKOPCp9HYa4fk/2UNTgmJSIN2YbqR0HLJycXP2rSkIkBw1likGpaya6i+LXmIsdln01HdNyAaQtp3xIipLnVM27q58G4NcaMpJ3NRWK8Qecp8UsB20UFgsYCl+zOs/LZePvn+IBur8pYMSt2o2T72lmA7UsFwGfzmj/sFJfcN4s2WrgCBjS9X/fY8+FQ+K3gBEbAgi4ObLmvdWW44Vc+lAYQm0iLL5MErN0mZIcWNPdraDFCul00l9/UHgzvyelxwSrDSpAxKC+Ps3Ulp79YCNkyeoiDn/Si81Hx5fPTqQyayWJkVPcvF16xU3h/69+9xFNN5uBMn+bWIONNxJTfZ4yZ8ZHjKqxW7g/8aO3veKy5zLj3ZmsFxE1CAcTyCd800IOwfxfapxKwwA8DrhEZF++1O894u3A7adFlMFg9IOtEc61slOikA5nAQQI1Act8UA0aMp0NggX7e/rZItR3yZk5fi6E+Xw4r4FW3cb8h5q4xDBzbGx14TFohDb9J1woMmO7w/u3LyTVjkxdeF/ZDnTTaroFmEqNyx0+HP2QUr3ojmutuAxT309KBpcvRR5g1l4tuUedz1+EoGkdURtkuuiF1cpoP37sQGMSWwCnGRHjQauDunIMOt1hPdLr8gcF1c0SXSkA4eLiA5WVR2n9FPzNJX+jbHh4iJr2hZcKDHBpRuX3Os3mn+vl3ZB5CTD09Pay65BqLgPWmg5iGzsx8gz62nybNClKEPolNx86YhD3PRy43UXH2z4VyvT7sVsuABezBHpEMDn3E7YinqSnuxCC3u6RKnME3hrwXXgYUnCmiZG6NyK6cl0cTR+V5WW6HKsXb6ZOolRic1pK8Cmyy/qgisolKpXve8QPoKBvQTVa5ffzwYrkFrbju2LjMwrHD+78IT0q0YwPiiUSLOaRoAtv2TN5u4lu8+cFB+uLiEi26oTK0du2txkjpHO8xSk/lGXXg8EmXrF13ZVvYF685Io4uAe0adoWxhSN6sXoVA6QowSGFYokrexZJqSiXF9pdTA6zQP5RkGR27XTLEDe7ieTC73NHJKgHxkIMoV82A224Ic5lj35Q6CC6xo5Nh2FaX5MxbHqOxx/RjnIBgD', 'page_age': 'June 18, 2025', 'title': 'San Diego, CA Weather Forecast | AccuWeather', 'type': 'web_search_result', 'url': 'https://www.accuweather.com/en/us/san-diego/92101/weather-forecast/347628'}, {'encrypted_content': 'EpcCCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDDUcPLkpzEu3pRIJBhoMxa3nhpUy/DX1dftLIjDbUHhKGSAmDV/SWQfasXVEoQzxQPIRlEbt6Y7S+QuqCybTk/P5wbrKQNjt+KugnAcqmgEOvvBtt/PC6BBEBEElwKARZXm4cgHdQ8xgXXbWWCaaHmU1Rb9xqlQqpcqX2LtsDiBBFx+GdYdpRYBqMv9TctMI1MIB1o6e5ft6DpoHnwI3Kwxcr3sFKwfqa+a4nBgZ3jafIYiAEYUce8CDuDKyNykrenNBACqCZWfz20GoAFfMkMTj3keAiIyvDTJ6IRFitJ9vDM9PeZbLrW5VGAM=', 'page_age': '5 days ago', 'title': 'Weather Forecast and Conditions for San Diego, CA - The Weather Channel | Weather.com', 'type': 'web_search_result', 'url': 'https://weather.com/weather/today/l/San+Diego+CA?canonicalCityId=3b2b39ed755b459b725bf2a29c71d678'}, {'encrypted_content': 'Er0ECioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDLceH9LHlRqne3xSyxoMhJNMEjH+lbV7t4ZbIjD48SUrVwHTXPpPxJmdsPlh9nOlo+/yq+S0x6yGdLQYhW35GcH/qZfCzNbCpqIbrLQqwAMi8AcxQwwO7GkGn6b+HTh/bXRIO8tRNYKA4zWZHEXA4DV5dYG9K4DVc3F2zQPMuJW4Mkpi4gICNyOgigW7llUGDsUc5Pk1SZLCJPyxm/l4R3yB4qxBXq63aXUUnTeGWnFGdQaCESmMHjaqh1IhhuO/lMadBXtSaaL+5NuiriMqqSYMfxcyhax8cUhO5y1jRmJXftLKulue1m8qaztEKHY0G+jRxkGbf/+PshQyVrLMJwO2gCx7SytoYvt+qtoQiP/Ia/9JbyjxrKsxLrLL1CIFa6M8mxJ7Scfxp3cCFESzaT98vavH6kxAvLdWN0a7uTAVmJ4xv4qGCwYBPJh3JFa4+ufMNoqNJ8evxAoOI1Agz9wX6aCr5a14aUH+/8EK1OK9W5JV+tc0WJSr1lr9SZoIR2jFtrgfYA4RzoAkqLJCiNK/5o9ZP3LUgXpVSQ2BB5qf1JMNwIX9OUqX0vrw058Ac9djDra5ZHGKK4EEKH/haeMw/zt8wu6h4hquM/ryVk6y6vMA7hUTOjrUODP7eu6KT0swBMC7zii5TtgDRHN+iyQ1X/tQYxSc3DXCyCEAGXSLsp9F7F33kNJ58zfaqaKuGAM=', 'page_age': '1 week ago', 'title': 'San Diego weather forecast – NBC 7 San Diego', 'type': 'web_search_result', 'url': 'https://www.nbcsandiego.com/weather/'}, {'encrypted_content': 'EsoCCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDG5ItqsRt847mp4EgBoMRhY/aLxuSU8QtcdNIjAUstjIEcSpFMJT49KpkFDkwEqgtk4W4wYs1mFQwtazx/H4H1ik6Y9fEtEoPyixAgMqzQEgPJSYTk9ti5hLrbatfIlDgBiYJxnMF3w6yADP7EFicF0Jf2OM0CmrIC4PzByH9LnOfBtUangs7eG931O+zmM+PcQ3zJeVIz6wVneQUF5YGs9Jy7zXhydB2DdosL7cD4PtohXY7KtNPiGmQKBcJ5wEf6LgIOZ91KEPTt77BFdT3UCrgQPXBzcaKqVMrOS2u7ZX6ifN4lRI/oC9ZOMSasOH19qACnGmZy5SE5nf5kdcysCinyMaCBDJQnZqq8DNcxYDh0ErIRZ2sQq9PvqJGAM=', 'page_age': '3 days ago', 'title': 'San Diego, CA', 'type': 'web_search_result', 'url': 'https://www.weather.gov/sgx/'}, {'encrypted_content': 'EoQHCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDEmG07vo2p+NY6fLKxoMPxNFlooXe0xGNYLAIjA/dzIglNWibDI3ZSGUqU3uj1lj87+x2HXMu2mD1cLGFXx+hTlWP9G+B0WqVk+vYwcqhwYr0Hj8xJ/y1LfZtB4vfUkVxfeNTK4PS1oApnZpXaz15EThACb4Jf+ZjT7QeWj0hYhvQJdraayVDsQaZLcg2UAdKeA8sPCiIodyLYb05y4IZl8jDYx8l4aQ4d1PUtFIgtm8iCoYGiqucMdMZqZy59Rw30O40IsVaDfh2gJh8B//rmUC9mRWhu7Yu1EJopuxduCW+ZayMFj0iKnPiKLp5Z3wael4IxEo5Ect/SSdTUO1PLhpkaAENxCyNgXoKMidSGZkrSxmAV6xfZL3rHPWlQAbrNnKM6CLnodbiO4l5omT+01IdrAJX2ghcwDz53I6WTarU4Orl/E1Qzen6uOk9ydBSp4Zc8Rn4QA/aCT2tsEwQ+1pyEASlba+rwkNOzd1DKixwpxJBydpXwxUoENqmc82G/V2fWiIarmztY5QizcrZlIn335Y+xd9PcBnbjxNbrM60f3DjT0q8Q0xaR89FlHBndOa7rQxkC+WfXHvXe6G3AneEseIyFP3jLGVx29LY2X8AvXZtTwGFL36ivtHWwxvewg0yH5wkoLO7SJQJ36D6CppdnZIHwUWxJAOU+QE382yubKt1RGMPv0+GJsXsrGUIE9oYZeNr6PemKEnz4S0umujoavJTAff6G41VjMcAUUN3zmreZ2UQZyfS1UgutzF9RDcUX7ZSwjHrsmAuv10Eo3XJKyW8/FAI68v5VIye3HqB5OCVS1vvZrfcEjRzG0vYnH2a3fhURRpVXZIGPclNZ8a04KeT5gyRMCB61GP6Sv/OXTsPJlPt8pIafXU11Zcu4SimPjgWRR5307iOI28dvLPNye80xzv/jt/qMDw4B1oLGe3xK7/tdUYSmh0jmqYYGn6VXdKRe/SolbSeRuF7qoJ3Z2Gy/4KgY0wF0duVO8UWHHs8+HuyVIDRjBf8lzRK4SkdNFWYn6+L5o8k3ChX0Me9MHlwMFHQgfk1BFQAGCTS1C6L3PIBlygOIoBBrZz/eiXbS/PNWsYIU2mRKymnn3sU3MMc+OlWhyXYw9N/g3f6WXnCWEnGAM=', 'page_age': None, 'title': 'San Diego, CA Hourly Weather Forecast | Weather Underground', 'type': 'web_search_result', 'url': 'https://www.wunderground.com/hourly/us/ca/san-diego'}, {'encrypted_content': 'Ev8CCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDN86qKKSUKj43EGulxoMw4Ts0wCZu7jwt9h6IjBB8kGPPDuiKodamMzyNcCWWjF7W30lX2DE19WWWYptCw0x4i+7KPIiWGZ6yyhLZbsqggKnqsf9AxKsr/gIHHQg1jVBCXpfQrsPT9dROO8Wxe7Y9EQP3+lO+nojzmuZLnGQ+w7UW+eIjvAShS3ARSJyWdN6ZjJWEzS52P/aw/nwocBwxD85McttN8wYH6ZvlnPlYS+SrgXKW56SZF53hV9ez6e6vJjsGpoLN4gaA/NARsCk73SeMSF59oTskN+oeql1SZ/mRmwtcLrLgi9/CoXywPQe/8AbEib7O30g0Bm4g4vC27Ns0r3DJs/pYNo9vj6T6vOEEN3tL8L05yCPA4IJ6TtOIH3J+qYjmdzh7xLUWnTmoONGQNs8AzaJ0bB1RaP+yEInypx00YJIMi1OAunxBGvyoaoYAw==', 'page_age': 'June 16, 2025', 'title': 'National Weather Service', 'type': 'web_search_result', 'url': 'https://forecast.weather.gov/zipcity.php?inputstring=San+Diego,CA/'}, {'encrypted_content': 'ErYCCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDMPaGJKo5gUcLcphghoMB0IFVaxhQyRrDi7mIjCnwfKZ1xqe+11CCN22f1shMDF/6lekueNPgphjtk3Jaie5w4hpYz5MLHh+7k1+K9MquQGupnVpMY53SVAGTPCLBQ/Xd6RZMAn1shTv+LzRJD65Foi+wWfHcG0Xo2PVocquorbKkgRARdqx+EpcxSvwlp5ivIO5srgIepZ2UGvZpFLRleD/klZOUYFKLbSBDw/AGdiz3h0XBcW2hueNUgYr/fQ4WhTGKbQIUZaYg5Lxza00I5yMhI6yUfV2C/uybE2vzlMDZOsw7WR2zyZGxjLxCq2wVfWPdyDhX+weA1V2OlQaulMqq6H5vv6soBgD', 'page_age': None, 'title': '10-day weather forecast for San Diego, CA, United States - The Weather Channel | weather.com', 'type': 'web_search_result', 'url': 'https://weather.com/en-GB/weather/tenday/l/San+Diego+CA+United+States?canonicalCityId=3b2b39ed755b459b725bf2a29c71d678'}, {'encrypted_content': 'Ev4LCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDBaVLg/Jgoat1fHuNhoMwMNxKdlRQRkZswH3IjCBcqdRO6TlecusfrHkTjZ3i7hMqqHupeObEhg5iMBdvp5B23KuPjHnREmi6U1VmJEqgQvs9lihYD5SI3yLUDzf7Ni1fOlsUw/vF9j2rrkxdDFN8C9lXq+3dJ44zHspzeem2GnybnIA4A+1+q0IQGG5Ko3SDIukHnqqTjq36zeBh9e15hMftcfEPEUIVxkr3Yq7yART4ge3utJwkrWYvNtMk8n43qZv0lSBAMbXFOKfwvQAbp5s+Uwo8Elm2+wTCY/L4zu8VeoYw23tatg2NacalueUN/6gOIHE82TT22El4K6R6lbhgi9Hie0BD7kIsCsEuwNKxpXRU4eeeYQeohhWvdahWAWuApFvx4vjOqJ0DrI9iecW9erB0Mq80fA1JWYMCpqRpY6gIO6WNSGUPMhlC2cXqT0b5xmcvXLjkW3rm3nHUDTPC6yfroQkS/iZ/P3QeZ1UHa8L4YTg9kn8Qd9moeN8FBJhoL3p5Bk+ltTvN93MAPEbaHTRT+XMs2IP42EzXidH4A6GF08xdkdFAkD9R3DaMQrTU4ev+LkV3waTtD7CDuIcn2fmL4Arq8Vj5N0uEB6fZsiMe7GZ4UpexUV8OsIjCgnsx3y0vnHun023oIqY0DYOx7NRiH53RtEUs8w5eWZ9n068GGpfJFP9gVanp/v87kbRiW2IPtB4Jhod4OXKh/yuGXGXz58xxDAdE1H6oKHDdpVS0EATiZuvDrs/dPNMgSgGy4yCwPJOss7bXrAXLbbuiAkyzxMNntxccsjACeLqsTbjUKr5RoTMVwl5afIdGhGp8Kj5qoBvXds98Z/Pv59IOyZ+UvnStlQUTNOUiwkfTi8KyS1e1YTHQ6wt3e3aW+i/tn7+oXM+5W25M7hnKwFdnUD9CWK9PqO4eziIu0258M0pWE3yk2LCemkVfjWsIAtiA3C4lf6vi6KC0ZuQKzGzW6cwqV/qDuuQZ2sq/hu2POi11MVH5BwY6gfjIFksJ44ZV1xWtA1BW7jg0hsf+boX0laGQHnI8HWJlld/+vh9Z4PCJnqWF1av2TtF9g9tPcWhg421fniNHW4l4ZnhGpVnY2/h6uJkhpFgBxB5UI+bDSC9Md1yrTvl/maXgcfjtdkpRKrjh10XlvFMGV1WoLXdR/H8ROIUqnb/09CmOyFASo2vDLwU6GsSQ1uMVyUr+26w2SBWdAHguBXaVszmqQXGg+TBdvvBIHntcbxmPd73jHRRqWhM1e5TP26v4SnL7MNEB8Sl/KXfGwa6g7QGYbw7arbptTrHxsr1BkAzkZ8DYXINl3OOAyt3IhoBUt39XPxeg+Mt1wTd31vdz1DZ1VpCRuhQcCpZJs1VP54Aq8bUyC8a9DvX8emk/71lzI98Iylb69gqeEDSDGy0bNVAtcQ2t8Rklh/rqVEfjjPpida/VIeZnS30Lo5TOV3XalMZtMAG9LQy3hil9Qx9r9PKm+IJjC0Lw/E3n/J94pUaPomb3Iy5SvlKdRg9uTUHQ2HPAc4ZUkasgX4wDTA0epqZBIp7ONKQ792ACJljpC1aB74UZRQGvpex4GsQa9p96yfpVQyym+CMLUwNn0aK+Wr2f/KPAxOqU0JF7ATg9RkLnc4ebFlfzb7jbta7b1UdaE+iijtpEETF+L1hbZU2rrWtEGfmj8LpAEKXY8SHSknqPLjYQSx88/XcEUcNq2UMSU5gCpgoAWCTzgsHWDaTUiLuT1e7Ug4WFPXmb9xKaxsZRGcz72aC9bOoBx68EPNyahWcO1UIhfsCIy8+kWc2tFP4JyxVLMDRy0PPAjsTC/5NIJSLRj9Ff7eWiGH+ZRR3M6kIyzALVNn0D7TLNFyCN0HOvewKQl7zfotihlWkiiQ5zF1Xsr+o/kfn9u1ZCMQih7N3RGjGu6HGJETMNrPRkFLcljjw4qUfNBb16Y9vT5MnaIaLo2OwIrvZ6MmQOCxEYOeQ3RgD', 'page_age': None, 'title': 'San Diego, CA 10-Day Weather Forecast | Weather Underground', 'type': 'web_search_result', 'url': 'https://www.wunderground.com/forecast/us/ca/san-diego'}, {'encrypted_content': 'EpkCCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDNaDICw+2Tu4mISGVxoMYyWbYKBGGJWCZIUBIjD4T9bVDtjKNbiwkjKMOWHiJfQ4SISexhXNVRtRcwSXX9B1R94qAa/5fOvyGw0lMA0qnAFofUaHvh8QIvPrW9nJGVHR5jM7L+zESeZ49U/pfKMTf5Hm5nk/idB09WjI9WkgBtQEPibCTERGZEL355HoPyI4WXxwm1JxlUqVal8UP5mNCMjIaKqsQhg+XGPdVJB4YYZyPMKLAa/70Xq0e288WiRdJTqbydIlhd867wsfBthdOFE6rql0vT5xKltE5kBI5u0p1JMyLkksiagCk4EYAw==', 'page_age': None, 'title': 'San Diego, CA Weather Forecast | KGTV | kgtv.com', 'type': 'web_search_result', 'url': 'https://www.10news.com/weather'}], 'tool_use_id': 'srvtoolu_01FZ6GitT4MtseTnQXyecch6', 'type': 'web_search_tool_result'}, {'citations': None, 'text': "Based on the search results, here's the current weather information for San Diego:\n\n", 'type': 'text'}, {'citations': [{'cited_text': 'zoom out ... Cloudy skies early, followed by partial clearing. High 71F. Winds SW at 5 to 10 mph. ', 'encrypted_index': 'EpIBCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDNjBgqnSvPNqSn4drhoMC6+kZClw5BYjVd8NIjA6fDgN1noG02jYw61wabO8fydLsv5zHsZZxrhDh0OdJwqlSvtrIdmYfYw0wtCJwlIqFj7hALu67qUa7K7MrhauADNMAzYyv9sYBA==', 'title': 'San Diego, CA Hourly Weather Forecast | Weather Underground', 'type': 'web_search_result_location', 'url': 'https://www.wunderground.com/hourly/us/ca/san-diego'}], 'text': "Today's weather in San Diego features cloudy skies early, followed by partial clearing, with a high of 71°F and southwest winds at 5 to 10 mph.", 'type': 'text'}, {'citations': None, 'text': ' ', 'type': 'text'}, {'citations': [{'cited_text': '... Cloudy. Low around 65F.', 'encrypted_index': 'EpMBCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDBhzQq0yxF6o/+Q0choM2dHtRqvNkUaeA4xwIjBy/SKmt+RS/kumqnx89kNao5ZaujjG/o0a5wMEkHK+dR2VpJDWMa4qfc6MpGR6HrgqF/VJyhvfPY5k/gsvGgONcjq4JYVX39MyGAQ=', 'title': 'San Diego, CA Hourly Weather Forecast | Weather Underground', 'type': 'web_search_result_location', 'url': 'https://www.wunderground.com/hourly/us/ca/san-diego'}], 'text': 'Tonight will be cloudy with a low around 65°F.', 'type': 'text'}, {'citations': None, 'text': '\n\n', 'type': 'text'}, {'citations': [{'cited_text': 'The air quality is generally acceptable for most individuals. However, sensitive groups may experience minor to moderate symptoms from long-term expos...', 'encrypted_index': 'EpEBCioIBRgCIiQ4ODk4YTFkYy0yMTNkLTRhNmYtOTljYi03ZTBlNTUzZDc0NWISDCClHXCaSIjS0Tq46hoM53gA8ca4Otbqc6PXIjDwu0g3MfveKcLfHPb+ziBL65IdHHjOOUgo+EtosYOJa4EiDqOazuQ7hiKRFD2RRs0qFZbxQCLaVCwiO4NWncjZF6q61c9vORgE', 'title': 'San Diego, CA Weather Forecast | AccuWeather', 'type': 'web_search_result_location', 'url': 'https://www.accuweather.com/en/us/san-diego/92101/weather-forecast/347628'}], 'text': 'The air quality is generally acceptable for most individuals, though sensitive groups may experience minor to moderate symptoms from long-term exposure.', 'type': 'text'}, {'citations': None, 'text': '\n\nFor more detailed and up-to-date forecasts, you can check the National Weather Service or local San Diego weather sources for the most current conditions and extended forecasts.', 'type': 'text'}]`
- model: `claude-sonnet-4-20250514`
- role: `assistant`
- stop_reason: `end_turn`
- stop_sequence: `None`
- type: `message`
- usage: `{'cache_creation_input_tokens': 0, 'cache_read_input_tokens': 7220, 'input_tokens': 12, 'output_tokens': 255, 'server_tool_use': {'web_search_requests': 1}, 'service_tier': 'standard'}`

</details>

## Third party providers

NB: The 3rd party model list is currently out of date--PRs to fix that would be welcome!

### Amazon Bedrock

These are Amazon's current Claude models:

In [None]:
models_aws

['anthropic.claude-opus-4-1-20250805-v1:0',
 'anthropic.claude-sonnet-4-20250514-v1:0',
 'claude-3-5-haiku-20241022',
 'claude-3-7-sonnet-20250219',
 'anthropic.claude-3-opus-20240229-v1:0',
 'anthropic.claude-3-5-sonnet-20241022-v2:0']

Provided `boto3` is installed, we otherwise don't need any extra code to support Amazon Bedrock -- we just have to set up the approach client:

In [None]:
ab = AnthropicBedrock(
    aws_access_key=os.environ['AWS_ACCESS_KEY'],
    aws_secret_key=os.environ['AWS_SECRET_KEY'],
)
client = Client(models_aws[0], ab)

In [None]:
chat = Chat(cli=client)

In [None]:
chat("I'm Jeremy")

### Google Vertex

In [None]:
models_goog

['claude-opus-4-1@20250805',
 'anthropic.claude-3-sonnet-20240229-v1:0',
 'anthropic.claude-3-haiku-20240307-v1:0',
 'claude-3-opus@20240229',
 'claude-3-5-sonnet-v2@20241022',
 'claude-3-sonnet@20240229',
 'claude-3-haiku@20240307']

In [None]:
from anthropic import AnthropicVertex
import google.auth

In [None]:
project_id = google.auth.default()[1]
region = "us-east5"
gv = AnthropicVertex(project_id=project_id, region=region)
client = Client(models_goog[-1], gv)

In [None]:
chat = Chat(cli=client)

In [None]:
chat("I'm Jeremy")

## Export -

In [None]:
#|hide
#|eval: false
from nbdev.doclinks import nbdev_export
nbdev_export()