# core

> lisette core

In [None]:
#| default_exp core

In [1]:
#| export
import litellm, json, asyncio
from html import escape
from typing import Optional
from litellm import acompletion, completion, stream_chunk_builder, get_model_info, ModelResponse, ModelResponseStream
from litellm.utils import function_to_dict
from toolslm.funccall import mk_ns, call_func, call_func_async, get_schema
from fastcore.utils import *

## LiteLLM

Litellm provides an easy wrapper for most big LLM providers.

In [2]:
ms = ["gemini/gemini-2.5-flash", "claude-sonnet-4-20250514", "openai/gpt-4.1"]

TODO: test mixed content/tool calls message (and mixed images too).

In [3]:
#| export
@patch
def _repr_markdown_(self: litellm.ModelResponse):
    message = self.choices[0].message
    content = ''
    if message.content: content += message.content
    if message.tool_calls:
        tool_calls = [f"\n\n🔧 {tc.function.name}({tc.function.arguments})\n" for tc in message.tool_calls]
        content += "\n".join(tool_calls)
    if not content: content = str(message)
    details = [
        f"id: `{self.id}`",
        f"model: `{self.model}`",
        f"finish_reason: `{self.choices[0].finish_reason}`"
    ]
    if hasattr(self, 'usage') and self.usage: details.append(f"usage: `{self.usage}`")
    det_str = '\n- '.join(details)
    
    return f"""{content}

<details>

- {det_str}

</details>"""

In [4]:
msg = [{'role':'user','content':'Hey there!', 'cache_control': {'type': 'ephemeral'}}]

In [5]:
for m in ms:
    display(f'=== {m} ===')
    display(completion(m,msg))

'=== gemini/gemini-2.5-flash ==='

Hey there! How can I help you today?

<details>

- id: `SrHDaJavLsfi7M8PjbOQsA8`
- model: `gemini-2.5-flash`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=942, prompt_tokens=4, total_tokens=946, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=932, rejected_prediction_tokens=None, text_tokens=10), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=None, text_tokens=4, image_tokens=None))`

</details>

'=== claude-sonnet-4-20250514 ==='

Hello! How are you doing today? Is there anything I can help you with?

<details>

- id: `chatcmpl-031d6949-7d6e-4a48-82a2-e58a6f2371b3`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=20, prompt_tokens=10, total_tokens=30, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

'=== openai/gpt-4.1 ==='

Hello! How can I help you today? 😊

<details>

- id: `chatcmpl-CEqk9eBWtLFh176VVei9W2IHBpLxT`
- model: `gpt-4.1-2025-04-14`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=10, prompt_tokens=10, total_tokens=20, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

### Streaming

In [6]:
#| export
def stream_with_complete(gen, postproc=noop):
    "Extend streaming response chunks with the complete response"
    chunks = []
    for chunk in gen:
        chunks.append(chunk)
        yield chunk
    postproc(chunks)
    return stream_chunk_builder(chunks)

In [7]:
from fastcore.xtras import SaveReturn

In [8]:
model = ms[1]

In [None]:
r = completion(messages=msg, model=model, stream=True)
r2 = SaveReturn(stream_with_complete(r))

In [None]:
for o in r2: print(o.choices[0].delta.content or '', end='')

In [None]:
r2.value

### Tools

In [9]:
#| export
def _lite_mk_func(f):
    if isinstance(f, dict): return f
    return {'type':'function', 'function':get_schema(f, pname='parameters')}

In [10]:
def simple_add(
    a: int,   # first operand
    b: int=0  # second operand
) -> int:
    "Add two numbers together"
    print(f"TOOL CALLED {a=} + {b=}")
    return a + b

In [18]:
toolsc = _lite_mk_func(simple_add)
toolsc

{'type': 'function',
 'function': {'name': 'simple_add',
  'description': 'Add two numbers together\n\nReturns:\n- type: integer',
  'parameters': {'type': 'object',
   'properties': {'a': {'type': 'integer', 'description': 'first operand'},
    'b': {'type': 'integer', 'description': 'second operand', 'default': 0}},
   'required': ['a']}}}

In [11]:
#| export
def mk_user(s, cache=False):
    res = {"role": "user", "content": s}
    if cache: res['cache_control'] = {'type': 'ephemeral'}
    return res

In [19]:
tmsg = mk_user("What is 5478954793+547982745? How about 5479749754+9875438979? Always use tools for calculations, and describe what you'll do before using a tool. Where multiple tool calls are required, do them in a single response where possible.")
r = completion(model, [tmsg], tools=[toolsc])

In [20]:
r

I'll help you calculate both of those sums using the addition tool. Let me perform both calculations for you:

1. First, I'll calculate 5478954793 + 547982745
2. Then, I'll calculate 5479749754 + 9875438979

🔧 simple_add({"a": 5478954793, "b": 547982745})



🔧 simple_add({"a": 5479749754, "b": 9875438979})


<details>

- id: `chatcmpl-532ec85d-e533-45ed-b48f-f2b1b742589a`
- model: `claude-sonnet-4-20250514`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=197, prompt_tokens=475, total_tokens=672, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

In [21]:
#| export
def _lite_call_func(tc,ns,raise_on_err=True):
    res = call_func(tc.function.name, json.loads(tc.function.arguments),ns=ns)
    return {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(res)}

In [22]:
tcs = [_lite_call_func(o, ns=globals()) for o in r.choices[0].message.tool_calls]
tcs

TOOL CALLED a=5478954793 + b=547982745
TOOL CALLED a=5479749754 + b=9875438979


[{'tool_call_id': 'toolu_01Ee4U35ZTARgiiRUodzcKv2',
  'role': 'tool',
  'name': 'simple_add',
  'content': '6026937538'},
 {'tool_call_id': 'toolu_01WQLZePHTNd4sSbYt2K5uEK',
  'role': 'tool',
  'name': 'simple_add',
  'content': '15355188733'}]

In [23]:
#| export
def delta_text(msg):
    "Extract printable content from streaming delta, return None if nothing to print"
    c = msg.choices[0]
    if not c: return c
    if not hasattr(c,'delta'): return None #f'{c}'
    delta = c.delta
    if delta.content: return delta.content
    if delta.tool_calls:
        res = ''.join(f"🔧 {tc.function.name}" for tc in delta.tool_calls if tc.id and tc.function.name)
        if res: return f'\n{res}'
    if hasattr(delta,'reasoning_content'): return '🧠' if delta.reasoning_content else '\n\n'
    return None

In [24]:
r = completion(messages=[tmsg], model=model, stream=True, tools=[toolsc])
r2 = SaveReturn(stream_with_complete(r))
for o in r2: print(delta_text(o) or '', end='')

I'll help you calculate both of those additions using the simple_add tool. Let me perform both calculations for you:
🔧 simple_add
🔧 simple_add

In [None]:
r2.value

In [None]:
msg = mk_user("Solve this complex math problem: What is the derivative of x^3 + 2x^2 - 5x + 1?")
r = completion(messages=[msg], model=model, stream=True, reasoning_effort="low")
r2 = SaveReturn(stream_with_complete(r))
for o in r2: print(delta_text(o) or '', end='')


In [None]:
r2.value

### Citations

In [35]:
search_tool = { "type": "web_search_20250305", "name": "web_search", "max_uses": 3}
smsg = mk_user("Search the web and tell me very briefly about otters")
r = completion(ms[1], [smsg], tools=[search_tool])
r

Otters are carnivorous mammals in the subfamily Lutrinae and members of the weasel family, found on every continent except Australia and Antarctica. There are 13 species in total, ranging from the small-clawed otter to the giant otter.

These aquatic mammals have several distinctive features: otters have the densest fur of any animal—as many as a million hairs per square inch in places, and long, slim bodies and relatively short limbs with powerful webbed feet used to swim. An otter's lung capacity is 2.5 times greater than that of similar-sized land mammals, and sea otters have been known to stay submerged for more than 5 minutes at a time.

Otters are expert swimmers and hunters that eat fish, crustaceans, and other critters. They're known for their playful behavior - river otters are especially playful, gamboling on land and splashing into rivers and streams. Sea otters have a unique feeding behavior: a sea otter will float on its back, place a rock on its chest, then smash the mollusk down on it until it breaks open.

Unfortunately, otters and their mustelid relatives were once hunted extensively for their fur, many to the point of near extinction, and despite regulations designed to protect them, many species remain at risk from pollution and habitat loss.

<details>

- id: `chatcmpl-11df26e2-7a6b-4cab-8f3e-b197c285900f`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=532, prompt_tokens=13632, total_tokens=14164, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), server_tool_use=ServerToolUse(web_search_requests=1), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

When not using streaming, all citations are placed in a separate key in the response:

We make these citations visible to end users by adding them as footnotes.

In [14]:
#| export
def format_citations(cs):
    sources = {f"- [{c['title']}]({c['url']})\n" for gs in cs for c in gs}
    return '**Citations:**\n' + ''.join(sorted(sources))

In [None]:
print(format_citations(r.choices[0].message.provider_specific_fields['citations']))

In [15]:
#| export
def add_citations_to_content(r):
    "Update LiteLLM ModelResponse content by appending formatted citations if they exist"
    if cs:=nested_idx(r.choices[0].message, 'provider_specific_fields', 'citations'):
        r.choices[0].message.content += '\n\n'+format_citations(cs)

In [None]:
add_citations_to_content(r)
r

In [None]:
r = list(completion(ms[1], [smsg], tools=[search_tool], stream=True))

In [None]:
#| export
def cite_footnotes(stream_list):
    "Add markdown footnote citations to stream deltas"
    for msg in stream_list:
        if not (delta:=nested_idx(msg, 'choices', 0, 'delta')): continue
        if citation:=nested_idx(delta, 'provider_specific_fields', 'citation'):
            title = citation['title'].replace('"', '\\"')
            delta.content = f'[*]({citation["url"]} "{title}") '

In [None]:
cite_footnotes(r)
stream_chunk_builder(r)

## Chat

Litellm is pretty bare bones. It doesnt keep track of conversation history or anything.

So lets make a claudette style wrapper so we can do streaming, toolcalling, and toolloops without problems.

In [41]:
#| export
# TODO: dont like this var name...
# TODO: make enum so type hints are nice
effort = AttrDict({o[0]:o for o in ('low','medium','high')})

In [40]:
#| export
class Chat:
    def __init__(self, model:str, sp='', temp=0, tools:list=None, hist:list=None, ns:Optional[dict]=None, cache=False):
        "LiteLLM chat client."
        self.model = model
        hist,tools = listify(hist),listify(tools)
        if ns is None and tools: ns = mk_ns(tools)
        elif ns is None: ns = globals()
        self.tool_schemas = [_lite_mk_func(t) for t in tools] if tools else None
        store_attr()
    
    def _prepare_msgs(self, msg=None, prefill=None):
        "Prepare the messages list for the API call"
        msgs = [{"role": "system", "content": self.sp}] if self.sp else []
        self.hist += [mk_user(msg, cache=self.cache)] if isinstance(msg, str) \
            else [msg] if isinstance(msg, dict) \
            else [] if msg is None \
            else msg
        if prefill and get_model_info(self.model)["supports_assistant_prefill"]: 
            self.hist.append({"role":"assistant","content":prefill})
        return msgs + [m if isinstance(m, dict) else m.model_dump() for m in self.hist]

    def _call(self, msg=None, prefill=None, temp=None, think=None, stream=False, max_tool_rounds=1, tool_round=0, final_prompt=None, tool_choice=None, **kwargs):
        "Internal method that always yields responses"
        msgs = self._prepare_msgs(msg, prefill)
        res = completion(model=self.model, messages=msgs, stream=stream, 
                         tools=self.tool_schemas, reasoning_effort = effort.get(think),
                         # temperature is not supported when reasoning
                         temperature=None if think else (temp if temp is not None else self.temp), **kwargs)
        if stream: res = yield from stream_with_complete(res)
        else: add_citations_to_content(res)
        m = res.choices[0].message
        self.hist.append(m)
        yield res

        if tcs := m.tool_calls:
            tool_results = [_lite_call_func(tc, ns=self.ns) for tc in tcs]
            if tool_round>=max_tool_rounds-1:
                tool_results += ([{"role": "user", "content": final_prompt}] if final_prompt else [])
                tool_choice='none'
            yield from self._call(
                tool_results, prefill, temp, None, stream, max_tool_rounds, tool_round+1,
                final_prompt, tool_choice=tool_choice, **kwargs)
    
    def __call__(self, msg=None, prefill=None, temp=None, think=None, stream=False, max_tool_rounds=1,
                 final_prompt=None, return_all=False, **kwargs):
        "Main call method - handles streaming vs non-streaming"
        result_gen = self._call(msg, prefill, temp, think, stream, max_tool_rounds, 0, final_prompt, **kwargs)     
        if stream: return result_gen              # streaming
        elif return_all: return list(result_gen)  # toolloop behavior
        else: return last(result_gen)             # normal chat behavior

## Add prefill

Litellm supports `prefill` for models that have this feature. Note, it does not add your prefill to the response, so you'll have to do that yourself in post-processing.

In [None]:
chat = Chat(ms[1])
chat("Hey my name is Rens", prefill="Howdy Re")

### Test history tracking

In [None]:
chat = Chat(m)
res = chat("Hey my name is Rens")
res

In [None]:
chat("Whats my name")

See now we keep track of history!

## Test tool use

Ok now lets test tool use

In [None]:
for m in ms:
    display(f'=== {m} ===')
    chat = Chat(m, tools=[simple_add])
    res = chat("What's 5 + 3?")
    display(res)

In [None]:
chat = Chat(ms[1], tools=[search_tool])
res = chat("Search the web and tell me very briefly about otters", stream=True)
for o in res:
    if isinstance(o, ModelResponse): sleep(0.01); display(o)
    else: print(delta_text(o) or '',end='')

## Test multi tool calling

In [None]:
chat = Chat(model, tools=[simple_add])
res = chat("What's ((5 + 3)+7)+11? Work step by step", return_all=True, max_tool_rounds=5)
for r in res: display(r)

In [42]:
@patch(as_prop=True)
def cost(self: Chat):
    "Total cost of all responses in conversation history"
    return sum(getattr(r, '_hidden_params', {}).get('response_cost')  or 0
               for r in self.h if hasattr(r, 'choices'))

Some models support parallel tool calling. I.e. sending multiple tool call requests in one conversation step.

In [None]:
def multiply(a: int, b: int) -> int:
    "Multiply two numbers"
    print(f"MULTIPLY: {a} * {b}")
    return a * b

chat = Chat(ms[-1], tools=[simple_add, multiply])
res = chat("Calculate (5 + 3) * (7 + 2)", max_tool_rounds=5, return_all=True)
for r in res: display(r)

See it did the additions in one go!

Hit max_tool_rounds limit with final_prompt

In [None]:
def divide(a: int, b: int) -> float:
    "Divide two numbers"
    display(f"DIVIDE: {a} / {b}")
    return a / b

chat = Chat(m, tools=[simple_add, multiply, divide])
res = chat("Calculate ((10 + 5) * 3) / (2 + 1) step by step", 
           max_tool_rounds=2, return_all=True,
           final_prompt="Please summarize what you've calculated so far")
print(f"Got {len(res)} responses")
for r in res: display(r)

## Streaming

Lets write a helper class to format our streaming output.

In [43]:
#| export
def _clean_str(text):
    "Clean content to prevent formatted content from breaking the tool result formatting."
    return escape(str(text)).replace('`', '').replace('\n', ' ').replace('|', ' ')

In [44]:
#| export
def _trunc_str(s, mx=2000, replace="…"):
    "Truncate `s` to `mx` chars max, adding `replace` if truncated"
    s = str(s).strip()
    return s[:mx]+replace if len(s)>mx else s

In [74]:
class ResponseFormatter:
    def __init__(self, res):
        self.res = [res] if isinstance(res, ModelResponse) else res
        self.thinking = False
        self._iter = None
        self.stream = None
        self.o = None
        self.n = 0
        self.pending_citations = ''
        self.footnotes = "\n\n"
        
    def __iter__(self): 
        if isinstance_str(self.res,'async_generator'): raise TypeError("Use 'async for' with async generators")
        return self
    
    def __aiter__(self): 
        if not isinstance_str(self.res,'async_generator'): raise TypeError("Use 'for' with sync generators")
        return self

    def __next__(self): 
        if self._iter is None: self._iter = iter(self.res)
        try: self.o = next(self._iter) 
        except StopIteration: 
            if self.footnotes.strip():
                f = self.footnotes
                self.footnotes = ""
                return f
            else: raise
        if self.stream is None: self.stream = isinstance(self.o,ModelResponseStream)
        return self.format_chunk(self.o)

    async def __anext__(self): 
        if self._iter is None: self._iter = aiter(self.res)
        try: self.o = await anext(self._iter)
        except StopAsyncIteration: 
            if self.footnotes.strip():
                f = self.footnotes
                self.footnotes = ""
                return f
            else: raise
        if self.stream is None: self.stream = isinstance(self.o,ModelResponseStream)
        return self.format_chunk(self.o)

    def format_chunk(self, o):
        res = ''
        if not self.stream or isinstance(o, ModelResponseStream):
            d = o.choices[0].delta if self.stream else o.choices[0].message
            if d.content: res+=d.content
            if d.tool_calls and self.pending_citations:
                res+=self.pending_citations
                self.pending_citations=''
            elif (c:=nested_idx(d,'provider_specific_fields','citation')):
                self.n+=1
                self.pending_citations+=f' [^{self.n}]'
                self.footnotes += f'[^{self.n}]: {c["url"]}\n\t\"{_clean_str(c["cited_text"])}\"\n\n'
            else: 
                if nested_idx(d,'reasoning_content'): 
                    self.thinking=True
                    res+='🧠'
                elif self.thinking: 
                    self.thinking=False
                    res+='\n\n'
        m = o.choices[0].message if isinstance(o, ModelResponse) else o
        if c := getattr(m, 'tool_calls', None):
            fn = first(c).function
            res+=f"\n<details class='tool-usage-details'>\n\n `{fn.name}({fn.arguments})`\n"
        if isinstance(m, dict) and 'tool_call_id' in m: 
            res+=f"  - `{_trunc_str(_clean_str(m.get('content')))}`\n\n</details>\n\n"
        return res

In [63]:
from IPython.display import display, Markdown

In [75]:
search_tool = { "type": "web_search_20250305", "name": "web_search", "max_uses": 3}
chat_stream_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[search_tool])
stream_gen = await chat_stream_tools("Search the web and tell me very briefly about otters. format with # Brief otter facts and then # A fun otter story", stream=True)
formatted_chunks = ResponseFormatter(stream_gen)
await amd_display(formatted_chunks)

# Brief otter facts

Otters are semiaquatic mammals that belong to the weasel family (Mustelidae) and are noted for their playful behaviour [^1]. The 13 species range in adult size from 0.6 to 1.8 m (2.0 to 5.9 ft) in length and 1 to 45 kg (2.2 to 99.2 lb) in weight. The Asian small-clawed otter is the smallest otter species and the giant otter and sea otter are the largest [^2].

The otter has a lithe and slender body with short legs, a strong neck, and a long flattened tail that helps propel the animal gracefully through water. Swimming ability is further enhanced in most species by four webbed feet [^3]. Otters have the densest fur of any animal—as many as a million hairs per square inch in places [^4].

Otters are playful animals and appear to engage in various behaviors for sheer enjoyment, such as making waterslides and sliding on them into the water. They may also find and play with small stones [^5]. An otter's lung capacity is 2.5 times greater than that of similar-sized land mammals. River otters, however, can hold their breath for up to 8 minutes [^6] [^7].

An otter's den is called a holt, or couch. Male otters are called dogs or boars; females are called bitches or sows; and their offspring are called pups or cubs. The collective nouns for otters are bevy, family, lodge, romp (being descriptive of their often playful nature), or, when in water, raft [^8].

# A fun otter story

In 2024, researchers in India got an unexpected surprise while conducting a tiger survey. A team of researchers was setting up trail cameras at Nandhaur Wildlife Sanctuary in March 2024 to help estimate the "tiger density" when they "saw an otter" [^9] [^10]. 

The brief glimpse intrigued them and led them to set up more trail cameras for a better look at the "smooth-coated" animal. The resulting photos were a first-of-their-kind record for the site [^11]. The 10-day-long survey worked: The trail cameras photographed the sanctuary's first confirmed sighting of smooth-coated otters. Two photos, taken May 16, show a gathering of four smooth-coated otters [^12].

What makes this story particularly charming is that "Little is known about otter distribution in Uttarakhand," the surrounding state, researchers said. The wildlife sanctuary had "suitable habitat for otters," but no one had seen them in the area [^13]. The researchers' dedication to following up on their brief otter sighting led to documenting these elusive creatures for the first time in that location!

[^1]: https://www.britannica.com/animal/otter
	"otter, (subfamily Lutrinae), any of 13 species of semiaquatic mammals that belong to the weasel family (Mustelidae) and are noted for their playful be..."

[^2]: https://en.wikipedia.org/wiki/Otter
	"The 13 species range in adult size from 0.6 to 1.8 m (2.0 to 5.9 ft) in length and 1 to 45 kg (2.2 to 99.2 lb) in weight. The Asian small-clawed otter..."

[^3]: https://www.britannica.com/animal/otter
	"The otter has a lithe and slender body with short legs, a strong neck, and a long flattened tail that helps propel the animal gracefully through water..."

[^4]: https://www.nationalgeographic.com/animals/mammals/facts/otters-1
	"Otters have the densest fur of any animal—as many as a million hairs per square inch in places. "

[^5]: https://en.wikipedia.org/wiki/Otter
	"Otters are playful animals and appear to engage in various behaviors for sheer enjoyment, such as making waterslides and sliding on them into the wate..."

[^6]: https://www.doi.gov/blog/12-facts-about-otters-sea-otter-awareness-week
	"An otter’s lung capacity is 2.5 times greater than that of similar-sized land mammals. "

[^7]: https://www.doi.gov/blog/12-facts-about-otters-sea-otter-awareness-week
	"River otters, however, can hold their breath for up to 8 minutes. "

[^8]: https://en.wikipedia.org/wiki/Otter
	"An otter&#x27;s den is called a holt, or couch. Male otters are called dogs or boars; females are called bitches or sows; and their offspring are called pu..."

[^9]: https://phys.org/news/2025-09-chance-sighting-tiger-survey-kind.html
	"A team of researchers was setting up trail cameras at Nandhaur Wildlife Sanctuary in March 2024 to help estimate the &amp;quot;tiger density&amp;quot; when th..."

[^10]: https://phys.org/news/2025-09-chance-sighting-tiger-survey-kind.html
	"A team of researchers was setting up trail cameras at Nandhaur Wildlife Sanctuary in March 2024 to help estimate the &quot;tiger density&quot; when they &quot;saw an..."

[^11]: https://phys.org/news/2025-09-chance-sighting-tiger-survey-kind.html
	"The brief glimpse intrigued them and led them to set up more trail cameras for a better look at the &quot;smooth-coated&quot; animal. The resulting photos were ..."

[^12]: https://phys.org/news/2025-09-chance-sighting-tiger-survey-kind.html
	"The 10-day-long survey worked: The trail cameras photographed the sanctuary&#x27;s first confirmed sighting of smooth-coated otters. Two photos, taken May ..."

[^13]: https://phys.org/news/2025-09-chance-sighting-tiger-survey-kind.html
	"&quot;Little is known about otter distribution in Uttarakhand,&quot; the surrounding state, researchers said. The wildlife sanctuary had &quot;suitable habitat for o..."



In [66]:
async for c in formatted_chunks:
    print(c)
    print('---')


---

---

---

---

---

---

---

---

---

---

---

---

---

---

---

---
#
---
 Brief otter facts


---

---

---
There are 13-14 known species of otters
---
 [^1] [^2]
---
, ranging from 
---

---
the Asian small-clawed otter (smallest) to the giant otter and sea otter (largest)
---
 [^3]
---
. 
---

---

---
Otters are carnivorous mammals in the subfamily Lutrinae, part of the Mustelidae family which includes weasels, badgers, an
---
d wolverines
---
 [^4] [^5]
---
.


---

---
Otters have long, slim bodies with relatively short limbs and powerful webbed feet for swimming, plus
---
 seal-like abilities for holding breath underwater
---
 [^6]
---
. 
---

---

---
An otter's lung capacity is 2.5 times greater than similar
---
-sized land mammals, with river
---
 otters able to hold their breath for up to 
---
8 minutes
---
 [^7] [^8]
---
.


---

---

---
Otters are playful animals that engage in behaviors for sheer enjoyment, such as making watersl
---
ides and sliding on them 

In [58]:
from rich import print as rprint
for s in sg:
    rprint(s.choices[0].delta)

AttributeError: 'Choices' object has no attribute 'delta'

In [None]:
frs = ResponseFormatter(Chat(model="claude-sonnet-4-20250514")("Count to 15", stream=True))
for o in frs: print(chunk,end='') 

In [None]:
frs.o

In [48]:
#| export
async def amd_display(chunks):    
    md = ''
    async for chunk in chunks: 
        md+=chunk
        display(Markdown(md),clear=True)

In [49]:
#| export
async def md_display(chunks):    
    md = ''
    for chunk in chunks: 
        md+=chunk
        display(Markdown(md),clear=True)

# Async

In [50]:
#| export
async def _alite_call_func(tc, ns, raise_on_err=True):
    res = await call_func_async(tc.function.name, json.loads(tc.function.arguments), ns=ns)
    return {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(res)}

As you cannot receive the return value of an async generator we have to write a little wrapper to capture this result:

In [51]:
#| export
@asave_iter
async def astream_result(self, agen, postproc=noop):
    chunks = []
    async for chunk in agen:
        chunks.append(chunk)
        yield chunk
    postproc(chunks)
    self.value = stream_chunk_builder(chunks)

In [52]:
#| export
class AsyncChat(Chat):
    async def _call(self, msg=None, prefill=None, temp=None, think=None, stream=False, max_tool_rounds=1, tool_round=0, final_prompt=None, tool_choice=None, **kwargs):
        "Internal method that always yields responses"
        msgs = self._prepare_msgs(msg, prefill)
        res = await acompletion(model=self.model, messages=msgs, stream=stream,
                         tools=self.tool_schemas, reasoning_effort=effort.get(think), 
                         # temperature is not supported when reasoning
                         temperature=None if think else (temp if temp is not None else self.temp), 
                         **kwargs)
        if stream:
            res = astream_result(res)
            async for chunk in res: yield chunk
            res = res.value
        else: add_citations_to_content(res)
        
        yield res
        self.hist.append(m:=res.choices[0].message)

        if tcs := m.tool_calls:
            tool_results = []
            for tc in tcs:
                result = await _alite_call_func(tc, ns=self.ns)
                tool_results.append(result)
                yield result
            
            if tool_round>=max_tool_rounds-1:
                tool_results += ([{"role": "user", "content": final_prompt}] if final_prompt else [])
                tool_choice='none'
            
            async for result in self._call(
                tool_results, prefill, temp, None, stream, max_tool_rounds, tool_round+1,
                final_prompt, tool_choice=tool_choice, **kwargs):
                    yield result
    
    async def __call__(self, msg=None, prefill=None, temp=None, think=None, stream=False, max_tool_rounds=1, final_prompt=None, return_all=False, **kwargs):
        "Main call method - handles streaming vs non-streaming"
        result_gen = self._call(msg, prefill, temp, think, stream, max_tool_rounds, 0, final_prompt, **kwargs)
        if stream or return_all: return result_gen
        async for res in result_gen: pass
        return res # normal chat behavior only return last msg

## Demonstration

### Async chat

In [None]:
chat = AsyncChat(model="claude-sonnet-4-20250514")
await chat("What is 2+2?")

### Async chat w tools

In [None]:
async def async_add(a: int, b: int) -> int:
    "Add two numbers asynchronously"
    await asyncio.sleep(0.1)
    return a + b

In [None]:
chat_with_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[async_add])
await chat_with_tools("What is 5 + 7? Use the tool to calculate it.")

### Streaming Async Chat

In [None]:
formatted_chunks = ResponseFormatter(await AsyncChat(model="claude-sonnet-4-20250514")("Count to 15", stream=True))
async for chunk in formatted_chunks: print(chunk,end='')
formatted_chunks.o

### Streaming Async Chat w tools

In [None]:
chat_stream_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[async_add])
stream_gen = await chat_stream_tools("What's 15 + 23? Use the tool and then explain the result.", stream=True)

formatted_chunks = ResponseFormatter(stream_gen)
await amd_display(formatted_chunks)

### Streaming Async Thinking

In [None]:
chat = AsyncChat(model="claude-sonnet-4-20250514")
stream_gen = await chat("What's the most efficient way to sort a list of 1000 random integers?", think='l',stream=True)
formatted_chunks = ResponseFormatter(stream_gen)
await amd_display(formatted_chunks)

## Async Non-Streaming Search w Citations

In [None]:
search_tool = { "type": "web_search_20250305", "name": "web_search", "max_uses": 3}
chat_stream_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[search_tool])
res = await chat_stream_tools("Search the web and tell me very briefly about otters", stream=False)
res

## Async Streaming Search w Citations

In [None]:
search_tool = { "type": "web_search_20250305", "name": "web_search", "max_uses": 3}
chat_stream_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[search_tool])
stream_gen = await chat_stream_tools("Search the web and tell me very briefly about otters", stream=True)
formatted_chunks = ResponseFormatter(stream_gen)
await amd_display(formatted_chunks)

## Export

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
from IPython.display import Image

In [None]:
fn = Path('samples/puppy.jpg')
Image(filename=fn, width=200)

In [None]:
def _mk_img(data:bytes)->tuple:
    "Convert image bytes to a base64 encoded image"
    img = base64.b64encode(data).decode("utf-8")
    mtype = mimetypes.types_map["."+imghdr.what(None, h=data)]
    return img, mtype

In [None]:
import base64
import mimetypes
from fastcore import imghdr

In [None]:
imgbytes = fn.read_bytes()
img,mtype = _mk_img(imgbytes)
imgd = { "image_url": {"url": f'data:{mtype};base64,{img}', "format":mtype} }

In [None]:
response = completion( model=model, 
    messages=[
        { "role": "user",
        "content": [{ "type": "text", "text": "What’s in this image?" },
        { "type": "image_url", **imgd }] }
    ])

In [None]:
response