# core

> lisette core

In [None]:
#| default_exp core

In [None]:
#| export
import litellm, json
from litellm import completion, stream_chunk_builder
from litellm.utils import function_to_dict
from toolslm.funccall import mk_ns, call_func
from typing import Optional
from fastcore.all import *

## LiteLLM

Litellm provides an easy wrapper for most big LLM providers.

In [None]:
ms = ["gemini/gemini-2.5-flash-preview-04-17", "claude-sonnet-4-20250514", "openai/gpt-4o-mini"]

In [None]:
for m in ms:
    print(f'=== {m} ===')
    res = completion(m,[{'role':'user','content':'Hey there!'}])
    print(res)

=== gemini/gemini-2.5-flash-preview-04-17 ===
ModelResponse(id='PfJjaMjXBJfOvdIPiNSzoQU', created=1751380539, model='gemini-2.5-flash-preview-04-17', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='Hi there! How can I help?', role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))], usage=Usage(completion_tokens=282, prompt_tokens=4, total_tokens=286, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=274, rejected_prediction_tokens=None, text_tokens=8), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=None, text_tokens=4, image_tokens=None)), vertex_ai_grounding_metadata=[], vertex_ai_url_context_metadata=[], vertex_ai_safety_results=[], vertex_ai_citation_metadata=[])
=== claude-sonnet-4-20250514 ===
ModelResponse(id='chatcmpl-261449a9-cfaa-4e94-b998-380d8252cec5', created

Lets add a wrapper

In [None]:
#| export
@patch
def _repr_markdown_(self: litellm.ModelResponse):
    # Extract content from the response
    message = self.choices[0].message
    if message.content:
        content = message.content
    elif message.tool_calls:
        # Show tool calls in a nice format
        tool_calls = [f"🔧 {tc.function.name}({tc.function.arguments})\n" for tc in message.tool_calls]
        content = "\n".join(tool_calls)
    else:
        content = str(message)
    
    # Create details section
    details = []
    details.append(f"id: `{self.id}`")
    details.append(f"model: `{self.model}`")
    details.append(f"finish_reason: `{self.choices[0].finish_reason}`")
    if hasattr(self, 'usage') and self.usage:
        details.append(f"usage: `{self.usage}`")
    
    det_str = '\n- '.join(details)
    
    return f"""{content}

<details>

- {det_str}

</details>"""

In [None]:
for m in ms:
    print(f'=== {m} ===')
    res = completion(m,[{'role':'user','content':'Hey there!'}])
    display(res)

=== gemini/gemini-2.5-flash-preview-04-17 ===


Hey there! What can I do for you today?

<details>

- id: `UvJjaPy-MPbYvdIP27yM6Qo`
- model: `gemini-2.5-flash-preview-04-17`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=319, prompt_tokens=4, total_tokens=323, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=308, rejected_prediction_tokens=None, text_tokens=11), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=None, text_tokens=4, image_tokens=None))`

</details>

=== claude-sonnet-4-20250514 ===


Hello! Nice to meet you. How are you doing today?

<details>

- id: `chatcmpl-f71641c2-2e18-46f1-8d52-7e098bf7ba33`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=16, prompt_tokens=10, total_tokens=26, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

=== openai/gpt-4o-mini ===


Hello! How can I assist you today?

<details>

- id: `chatcmpl-BoWNbDOexmZTJtOBPGUegSfjBTB4V`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=9, prompt_tokens=10, total_tokens=19, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

## Chat

Litellm is pretty bare bones. It doesnt keep track of conversation history or anything.

So lets make a claudette style wrapper so we can do streaming, toolcalling, and toolloops without problems.

In [None]:
#| export
def stream_with_complete(gen):
    "Extend streaming response chunks with the complete response"
    chunks = []
    for chunk in gen:
        chunks.append(chunk)
        yield chunk
    return stream_chunk_builder(chunks)

In [None]:
#| export
class Chat:
    def __init__(self, model: str, sp='', temp=0, tools: list = None, 
                 hist: list = None, ns: Optional[dict] = None):
        "LiteLLM chat client."
        self.model = model
        if hist is None: hist = []
        if tools is None: tools = []
        
        # Set up namespace following claudette pattern
        if ns is None and tools: ns = mk_ns(tools)
        elif ns is None: ns = globals()
        
        # Cache tool schemas
        self.tool_schemas = [{'type':'function', 'function':function_to_dict(t)} for t in tools] if tools else None
        self.h, self.sp, self.temp, self.tools, self.ns = hist, sp, temp, tools, ns
    
    def _prepare_messages(self, msg=None):
        "Prepare the messages list for the API call"
        messages = [{"role": "system", "content": self.sp}] if self.sp else []
        
        if isinstance(msg, str): self.h.append({"role": "user", "content": msg})
        elif isinstance(msg, dict): self.h.append(msg)
        elif isinstance(msg, list): self.h.extend(msg)
        elif msg is None: pass
        else: raise ValueError(f"Can't parse {msg=}")
            
        for m in self.h: messages.append(m if isinstance(m, dict) else m.model_dump())
        return messages
    
    def _call(self, msg=None, stream=False, max_tool_rounds=1, tool_round=0, 
              cont_func=noop, final_prompt=None, **kwargs):
        "Internal call method that always yields responses"
        messages = self._prepare_messages(msg)
        
        # Make the API call
        res = litellm.completion(model=self.model, messages=messages, stream=stream, 
                               tools=self.tool_schemas, temperature=self.temp, **kwargs)
        
        if stream: res = yield from stream_with_complete(res)        

        m = res.choices[0].message
        self.h.append(m)
        yield res

        
        if tcs := m.tool_calls:
            tool_results = [_lite_call_func(tc, ns=self.ns) for tc in tcs]
            
            # Check continuation function: user_msg, llm_response, tool_results
            user_msg = self.h[-2] if len(self.h) >= 2 else None
            if not cont_func(user_msg, m, tool_results): return
                
            # Continue with more rounds or final round
            if tool_round < max_tool_rounds - 1:
                yield from self._call(tool_results, stream, max_tool_rounds, tool_round+1, cont_func, final_prompt, **kwargs)
            else:
                # Final round - inject final_prompt if provided and set tool_choice=None
                final_msg = tool_results + ([{"role": "user", "content": final_prompt}] if final_prompt else [])
                yield from self._call(final_msg, stream, max_tool_rounds, tool_round+1, cont_func, final_prompt, tool_choice='none', **kwargs)
    
    def __call__(self, msg=None, stream=False, max_tool_rounds=1, cont_func=noop, final_prompt=None, return_all=False, **kwargs):
        "Main call method - handles streaming vs non-streaming"
        result_gen = self._call(msg, stream, max_tool_rounds, 0, cont_func, final_prompt, **kwargs)        
        if stream: return result_gen              # streaming
        elif return_all: return list(result_gen)  # toolloop behavior
        else: return last(result_gen)             # normal chat behavior

### Test history tracking

In [None]:
chat = Chat(m)
res = chat("Hey my name is Rens")
res

Hi Rens! How can I assist you today?

<details>

- id: `chatcmpl-BoWPUgqbgQcmPT7eorIAn3ivT6LvJ`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=11, prompt_tokens=13, total_tokens=24, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

In [None]:
chat("Whats my name")

Your name is Rens! How can I help you today?

<details>

- id: `chatcmpl-BoWPVFzKbkud61mWw3gNvx1fVMZjI`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=13, prompt_tokens=35, total_tokens=48, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

See now we keep track of history!

### Testing streaming

In [None]:
from time import sleep
chat2 = Chat(m)
stream_gen = chat2("Count to 5", stream=True)
print("Streaming:")
for chunk in stream_gen:
    sleep(0.1)  # for effect
    if isinstance(chunk,litellm.ModelResponseStream): 
        if c:=chunk.choices[0].delta.content: print(c,end='')
    else: 
        print("\n\nWhole response:")
        display(chunk)

Streaming:
1, 2, 3, 4, 5.

Whole response:


1, 2, 3, 4, 5.

<details>

- id: `chatcmpl-BoWRXne5cL6RHZDBgKYGqKKmB4KZS`
- model: `gpt-4o-mini`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=14, prompt_tokens=11, total_tokens=25, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

# Test tool use

Ok now lets test tool use

In [None]:
#| export
def _lite_call_func(tc,ns,raise_on_err=True):
    res = call_func(tc.function.name, json.loads(tc.function.arguments),ns=ns)
    return {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(res)}

In [None]:
def simple_add(a: int, b: int=0) -> int:
    "Add two numbers together"
    print(f"TOOL CALLED {a=} + {b=}")
    return a + b

In [None]:
# Test the tool with our Chat class
for m in ms:
    print(f'=== {m} ===')
    chat = Chat(m, tools=[simple_add])
    res = chat("What's 5 + 3?")
    display(res)

=== gemini/gemini-2.5-flash-preview-04-17 ===
TOOL CALLED a=5 + b=3


5 + 3 is 8.

<details>

- id: `pPNjaKOqF52MvdIPqdrw-Qw`
- model: `gemini-2.5-flash-preview-04-17`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=8, prompt_tokens=95, total_tokens=103, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=None, text_tokens=95, image_tokens=None))`

</details>

=== claude-sonnet-4-20250514 ===
TOOL CALLED a=5 + b=3


5 + 3 = 8

<details>

- id: `chatcmpl-dcb22cea-1f66-4efd-b571-2bf24d0af443`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=12, prompt_tokens=492, total_tokens=504, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

=== openai/gpt-4o-mini ===
TOOL CALLED a=5 + b=3


5 + 3 equals 8.

<details>

- id: `chatcmpl-BoWT4qZTU63gc6XM6Buriho1A1uxL`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=8, prompt_tokens=81, total_tokens=89, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

In [None]:
# Test the tool with our Chat class
# TODO: prettify printing of toolcalls?
for m in ms:
    print(f'=== {m} ===')
    chat = Chat(m, tools=[simple_add])
    res = chat("What's 5 + 3?",stream=True)
    for o in res: print(o)

=== gemini/gemini-2.5-flash-preview-04-17 ===
ModelResponseStream(id='yvNjaOL5K4iBxs0PysmniAE', created=1751380939, model='gemini-2.5-flash-preview-04-17', object='chat.completion.chunk', system_fingerprint=None, choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(provider_specific_fields=None, content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionDeltaToolCall(id='call_3a0a0e58-7343-4c74-8540-7d3a516e3b1c', function=Function(arguments='{"a": 5, "b": 3}', name='simple_add'), type='function', index=0)], audio=None), logprobs=None)], provider_specific_fields=None, stream_options=None)
ModelResponseStream(id='yvNjaOL5K4iBxs0PysmniAE', created=1751380939, model='gemini-2.5-flash-preview-04-17', object='chat.completion.chunk', system_fingerprint=None, choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(provider_specific_fields=None, content=None, role=None, function_call=None, tool_calls=None, audio=None), logprobs=None)], provider_spe

ModelResponseStream(id='chatcmpl-563fdc41-187b-438c-b9f4-c2356ba7f16c', created=1751380945, model='claude-sonnet-4-20250514', object='chat.completion.chunk', system_fingerprint=None, choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(provider_specific_fields=None, content='5', role='assistant', function_call=None, tool_calls=None, audio=None), logprobs=None)], provider_specific_fields=None, stream_options=None, citations=None)
ModelResponseStream(id='chatcmpl-563fdc41-187b-438c-b9f4-c2356ba7f16c', created=1751380945, model='claude-sonnet-4-20250514', object='chat.completion.chunk', system_fingerprint=None, choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(provider_specific_fields=None, content=' + 3', role=None, function_call=None, tool_calls=None, audio=None), logprobs=None)], provider_specific_fields=None, stream_options=None, citations=None)
ModelResponseStream(id='chatcmpl-563fdc41-187b-438c-b9f4-c2356ba7f16c', created=1751380945, model='claude-sonn

ModelResponseStream(id='chatcmpl-BoWTkBkvpEk0xP69v6TCQNFmE12mN', created=1751380948, model='gpt-4o-mini', object='chat.completion.chunk', system_fingerprint='fp_34a54ae93c', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(provider_specific_fields=None, refusal=None, content='5', role='assistant', function_call=None, tool_calls=None, audio=None), logprobs=None)], provider_specific_fields=None, stream_options={'include_usage': True}, citations=None)
ModelResponseStream(id='chatcmpl-BoWTkBkvpEk0xP69v6TCQNFmE12mN', created=1751380948, model='gpt-4o-mini', object='chat.completion.chunk', system_fingerprint='fp_34a54ae93c', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(provider_specific_fields=None, refusal=None, content=' +', role=None, function_call=None, tool_calls=None, audio=None), logprobs=None)], provider_specific_fields=None, stream_options={'include_usage': True}, citations=None)
ModelResponseStream(id='chatcmpl-BoWTkBkvpEk0xP69v6TCQNFmE12mN',

## Test multi tool calling

In [None]:
# Test parallel tool calling
for m in ms:
    print(f'=== {m} ===')e
    chat = Chat(m, tools=[simple_add])
    res = chat("What's 5 + 3? And what is 13 + 9. Use tool tools for each", stream=False)
    print(res)

=== claude-sonnet-4-20250514 ===
TOOL CALLED a=5 + b=3
TOOL CALLED a=13 + b=9
ModelResponse(id='chatcmpl-20ce68c7-010c-4579-bb09-e12a728ce570', created=1751374936, model='claude-sonnet-4-20250514', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='The results are:\n- 5 + 3 = 8\n- 13 + 9 = 22', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=31, prompt_tokens=612, total_tokens=643, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0))
=== openai/gpt-4o-mini ===
TOOL CALLED a=5 + b=3
TOOL CALLED a=13 + b=9
ModelResponse(id='chatcmpl-BoUuozQcAEhp4a0WG0TFRO3pfJef6', created=1751374938, model='gpt-4o-mini-2024-07-18', object='chat.completion', system_f

In [None]:
# Test (multi-)toolloop
for m in ms:
    print(f'=== {m} ===')
    chat = Chat(m, tools=[simple_add])
    res = chat("What's ((5 + 3)+7)+11? Work step by step", stream=False, return_all=True,max_tool_rounds=5)
    for r in res: display(r)

=== claude-sonnet-4-20250514 ===
TOOL CALLED a=5 + b=3
TOOL CALLED a=8 + b=7
TOOL CALLED a=15 + b=11


I'll solve this step by step using the addition function.

First, let me calculate 5 + 3:

<details>

- id: `chatcmpl-6a229bb0-c27c-47ca-be83-132753beca64`
- model: `claude-sonnet-4-20250514`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=96, prompt_tokens=399, total_tokens=495, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

Now I'll add 7 to that result (8 + 7):

<details>

- id: `chatcmpl-b9ca4908-1f3c-41f7-938e-7a3283490be0`
- model: `claude-sonnet-4-20250514`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=88, prompt_tokens=508, total_tokens=596, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

Finally, I'll add 11 to that result (15 + 11):

<details>

- id: `chatcmpl-5673c14d-9776-477c-a0f6-bb25714a6780`
- model: `claude-sonnet-4-20250514`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=89, prompt_tokens=609, total_tokens=698, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

So working step by step:
- 5 + 3 = 8
- 8 + 7 = 15  
- 15 + 11 = 26

Therefore, ((5 + 3) + 7) + 11 = **26**

<details>

- id: `chatcmpl-aee46720-01b7-440d-8245-96e13ce02785`
- model: `claude-sonnet-4-20250514`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=68, prompt_tokens=711, total_tokens=779, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0)`

</details>

=== openai/gpt-4o-mini ===
TOOL CALLED a=5 + b=3
TOOL CALLED a=7 + b=11
TOOL CALLED a=8 + b=18


🔧 simple_add({"a": 5, "b": 3})

🔧 simple_add({"a": 7, "b": 11})


<details>

- id: `chatcmpl-BoVyzdn8Embb2JFdlGARlyJAjNTvT`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=52, prompt_tokens=61, total_tokens=113, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

🔧 simple_add({"a":8,"b":18})


<details>

- id: `chatcmpl-BoVz0xc4DrvUQVeXj18GSlqdD7LiP`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=18, prompt_tokens=129, total_tokens=147, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

Let's break it down step by step:

1. First, we calculate \(5 + 3\):
   \[
   5 + 3 = 8
   \]

2. Next, we add \(7 + 11\):
   \[
   7 + 11 = 18
   \]

3. Finally, we add the results from the previous steps:
   \[
   8 + 18 = 26
   \]

So, \(((5 + 3) + 7) + 11 = 26\).

<details>

- id: `chatcmpl-BoVz174p7RakMdVuyAu2cG93IbOO3`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=117, prompt_tokens=156, total_tokens=273, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

In [None]:
ms

['gemini/gemini-2.5-flash-preview-04-17',
 'claude-sonnet-4-20250514',
 'openai/gpt-4o-mini']

Some models support parallel tool calling. I.e. sending multiple tool call requests in one conversation step.

In [None]:
def multiply(a: int, b: int) -> int:
    "Multiply two numbers"
    print(f"MULTIPLY: {a} * {b}")
    return a * b

chat = Chat('openai/gpt-4o-mini', tools=[simple_add, multiply])
res = chat("Calculate (5 + 3) * (7 + 2)", max_tool_rounds=5, return_all=True)
for r in res: display(r)

TOOL CALLED a=5 + b=3
TOOL CALLED a=7 + b=2
MULTIPLY: 8 * 9


🔧 simple_add({"a": 5, "b": 3})

🔧 simple_add({"a": 7, "b": 2})


<details>

- id: `chatcmpl-BoWUuJ6x9FVpiW0haODAVGEdvzZbO`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=52, prompt_tokens=81, total_tokens=133, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

🔧 multiply({"a":8,"b":9})


<details>

- id: `chatcmpl-BoWUvqPaatqHkwOAjrGH7CekpzwAj`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=17, prompt_tokens=149, total_tokens=166, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

The result of the calculation \((5 + 3) * (7 + 2)\) is \(72\).

<details>

- id: `chatcmpl-BoWUwkuaNwiKe9sP3Gd3o6CEoyNeZ`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=26, prompt_tokens=174, total_tokens=200, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

See it did the additions in one go!

## Toolloop

Oh wait above we also demonstrated a toolloop! With litellm we might actually be able to put toolloop straight into the main `__call__` of Chat.

We have the new `return_all=False` parameter. It's only relevant when you're not streaming. Because if you're streaming we always send back everything. But if you're not, then `return_all` determines if we only send back the last llm response or all of them. 

If you set max_tool_rounds to > 1 and return_all=True then you basically have a toolloop I think.

Lets show toolloop hitting a max rounds limit:

In [None]:
# Test 2: Hit max_tool_rounds limit with final_prompt
def divide(a: int, b: int) -> float:
    "Divide two numbers"
    print(f"DIVIDE: {a} / {b}")
    return a / b

chat = Chat(m, tools=[simple_add, multiply, divide])
res = chat("Calculate ((10 + 5) * 3) / (2 + 1) step by step", 
           max_tool_rounds=2, 
           final_prompt="Please summarize what you've calculated so far",
           return_all=True)
print(f"Got {len(res)} responses")
for r in res: display(r)

TOOL CALLED a=10 + b=5
TOOL CALLED a=2 + b=1
MULTIPLY: 15 * 3
Got 3 responses


🔧 simple_add({"a": 10, "b": 5})

🔧 simple_add({"a": 2, "b": 1})


<details>

- id: `chatcmpl-BoWXPSlEvAVVfSCFzroiYQnv6KLbq`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=52, prompt_tokens=109, total_tokens=161, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

🔧 multiply({"a":15,"b":3})


<details>

- id: `chatcmpl-BoWXQIpSeKGEEQoEElI7aswVgLpKt`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=17, prompt_tokens=177, total_tokens=194, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

So far, I have calculated the following steps:

1. **Addition**: \(10 + 5 = 15\)
2. **Addition**: \(2 + 1 = 3\)
3. **Multiplication**: \(15 \times 3 = 45\)

Next, we need to divide \(45\) by \(3\) to complete the calculation.

<details>

- id: `chatcmpl-BoWXRpJdfBGrYZup0Rp8h6AbxFMvb`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=80, prompt_tokens=214, total_tokens=294, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

In [None]:
# Test 3: Custom cont_func to stop early
def stop_on_error(user_msg, llm_resp, tool_results):
    "Stop if any tool result contains 'error'"
    for result in tool_results:
        if 'error' in str(result['content']).lower():
            print("STOPPING: Found error in tool result")
            return False
    return True

def error_tool(x: int) -> str:
    "A tool that sometimes errors"
    if x > 10: return "Error: number too big!"
    return f"Success: {x}"

chat = Chat(m, tools=[error_tool])
res = chat("Try error_tool with 15", 
           max_tool_rounds=3,
           cont_func=stop_on_error,
           return_all=True)
for r in res: display(r)

STOPPING: Found error in tool result


🔧 error_tool({"x":15})


<details>

- id: `chatcmpl-BoWY1u1xejxD7KwDXJeqHe4NZx5Z8`
- model: `gpt-4o-mini-2024-07-18`
- finish_reason: `tool_calls`
- usage: `Usage(completion_tokens=14, prompt_tokens=49, total_tokens=63, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

Lets also show streaming with toolloops:

In [None]:
# Test 4: Streaming with tool loops
chat = Chat(m, tools=[simple_add, multiply])
stream_gen = chat("Calculate (4 + 6) * 2", max_tool_rounds=3, stream=True)

print("Streaming responses:")
for chunk in stream_gen:
    if isinstance(chunk, litellm.ModelResponseStream): 
        if c:= chunk.choices[0].delta.content: print(c,end='')
    else: display(chunk)

Streaming responses:


🔧 simple_add({"a": 4, "b": 6})

🔧 multiply({"a": 10, "b": 2})


<details>

- id: `chatcmpl-BoWZwFDuE2LwQIM5VhAZltJVOCmWe`
- model: `gpt-4o-mini`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=51, prompt_tokens=77, total_tokens=128, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

TOOL CALLED a=4 + b=6
MULTIPLY: 10 * 2
The result of the calculation \((4 + 6) * 2\) is \(20\).

The result of the calculation \((4 + 6) * 2\) is \(20\).

<details>

- id: `chatcmpl-BoWZxGlsGaPinEeru0i9py6dfjZiq`
- model: `gpt-4o-mini`
- finish_reason: `stop`
- usage: `Usage(completion_tokens=23, prompt_tokens=144, total_tokens=167, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None))`

</details>

## Export

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()