# Async

> Implements an `AsyncChat` version that mirrors the regular `lisette.Chat` as closely as possible.

In [None]:
#| default_exp asink

In [None]:
#| export
import json,asyncio
from litellm import acompletion, ModelResponse, ModelResponseStream, stream_chunk_builder
from toolslm.funccall import call_func_async
from fastcore.utils import *
from lisette.core import *

## Implementation

In [None]:
#| export
async def _alite_call_func(tc, ns, raise_on_err=True):
    res = await call_func_async(tc.function.name, json.loads(tc.function.arguments), ns=ns)
    return {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(res)}

As you cannot receive the return value of an async generator we have to write a little wrapper to capture this result:

In [None]:
#| export
@asave_iter
async def astream_result(self, agen, postproc=noop):
    chunks = []
    async for chunk in agen:
        chunks.append(chunk)
        yield chunk
    postproc(chunks)
    self.value = stream_chunk_builder(chunks)

In [None]:
#| export
class AsyncChat(Chat):
    async def _call(self, msg=None, prefill=None, temp=None, think=None, stream=False, max_tool_rounds=1, tool_round=0, final_prompt=None, tool_choice=None, **kwargs):
        "Internal method that always yields responses"
        msgs = self._prepare_msgs(msg, prefill)
        res = await acompletion(model=self.model, messages=msgs, stream=stream,
                         tools=self.tool_schemas, reasoning_effort=effort.get(think), 
                         # temperature is not supported when reasoning
                         temperature=None if think else (temp if temp is not None else self.temp), 
                         **kwargs)
        if stream:
            res = astream_result(res, postproc=cite_footnotes)
            async for chunk in res: yield chunk
            res = res.value
        
        yield res
        self.hist.append(m:=res.choices[0].message)

        if tcs := m.tool_calls:
            tool_results = []
            for tc in tcs:
                result = await _alite_call_func(tc, ns=self.ns)
                tool_results.append(result)
                yield result
            
            if tool_round>=max_tool_rounds-1:
                tool_results += ([{"role": "user", "content": final_prompt}] if final_prompt else [])
                tool_choice='none'
            
            async for result in self._call(
                tool_results, stream, max_tool_rounds, tool_round+1,
                final_prompt, tool_choice=tool_choice, **kwargs):
                    yield result
    
    async def __call__(self, msg=None, prefill=None, temp=None, think=None, stream=False, max_tool_rounds=1, final_prompt=None, return_all=False, **kwargs):
        "Main call method - handles streaming vs non-streaming"
        result_gen = self._call(msg, prefill, temp, think, stream, max_tool_rounds, 0, final_prompt, **kwargs)
        if stream or return_all: return result_gen
        async for res in result_gen: pass
        return res # normal chat behavior only return last msg

## Demonstration

### Async chat

In [None]:
chat = AsyncChat(model="claude-sonnet-4-20250514")
await chat("What is 2+2?")

2 + 2 = 4

<details>

- id: `chatcmpl-30629f13-f308-4835-a014-db345c686773`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=13, prompt_tokens=14, total_tokens=27, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

### Async chat w tools

In [None]:
async def async_add(a: int, b: int) -> int:
    "Add two numbers asynchronously"
    print('>>> async add is being called!')
    await asyncio.sleep(0.1)  # Simulate async work
    return a + b

In [None]:
chat_with_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[async_add])
await chat_with_tools("What is 5 + 7? Use the tool to calculate it.")

>>> async add is being called!


The result of 5 + 7 is 12.

<details>

- id: `chatcmpl-dea3c9f6-d28d-4e9f-b514-326d45125bd6`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=17, prompt_tokens=528, total_tokens=545, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

### Streaming Async Chat

In [None]:
chat = AsyncChat(model="claude-sonnet-4-20250514")
stream_gen = await chat("Count to 50", stream=True)

async for chunk in stream_gen:
    if   isinstance(chunk, ModelResponseStream): print(delta_text(chunk) or '',end='')
    elif isinstance(chunk, ModelResponse):       display(chunk)

### Streaming Async Chat w tools

In [None]:
chat_stream_tools = AsyncChat(model="claude-sonnet-4-20250514", tools=[async_add])
stream_gen = await chat_stream_tools("What's 15 + 23? Use the tool and then explain the result.", stream=True)

async for chunk in stream_gen:
    if isinstance(chunk, ModelResponse): display(chunk)
    elif isinstance(chunk,dict): continue  # tool result
    else: print(delta_text(chunk) or '', end='')

### Streaming Async Thinking

In [None]:
chat = AsyncChat(model="claude-sonnet-4-20250514")
res = await chat("What's the most efficient way to sort a list of 1000 random integers?", think='l',stream=True)

async for chunk in res:
    if isinstance(chunk, ModelResponse): display(chunk)
    elif isinstance(chunk,dict): continue  # tool result
    else: print(delta_text(chunk) or '', end='')