# Streaming APIs

This notebooks shows examples for how to use the streaming APIs for both OpenAI and Ollama.

When instantiating the client, set `async_client=True` to use the async client which supports streaming.


In [1]:
from not_again_ai.llm.chat_completion import chat_completion_stream
from not_again_ai.llm.chat_completion.providers.ollama_api import ollama_client
from not_again_ai.llm.chat_completion.providers.openai_api import openai_client
from not_again_ai.llm.chat_completion.types import ChatCompletionRequest, SystemMessage, UserMessage

openai_client = openai_client(async_client=True)
ollama_client = ollama_client(async_client=True)

In [2]:
# OpenAI Streaming Example with tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "format": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit to use. Infer this from the users location.",
                    },
                },
                "required": ["location", "format"],
            },
        },
    },
]
messages = [
    SystemMessage(content="Call the get_current_weather function once for each city that the user mentions."),
    UserMessage(content="What's the current weather like in Boston, MA and New York, NY today?"),
]
request = ChatCompletionRequest(
    model="gpt-4o-mini-2024-07-18",
    messages=messages,
    tools=tools,
    max_completion_tokens=400,
    temperature=0,
)

async for chunk in chat_completion_stream(request, "openai", openai_client):
    print(chunk.model_dump(mode="json", exclude_none=True), flush=True)

{'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'index': 0}], 'errors': '', 'response_duration': 1.5805}
{'choices': [{'delta': {'content': '', 'role': 'assistant', 'tool_calls': [{'id': 'call_W5ScLS0Atqtt4awudPS8pA6G', 'function': {'name': 'get_current_weather', 'arguments': ''}, 'type': 'function'}]}, 'index': 0}], 'errors': '', 'response_duration': 1.5835}
{'choices': [{'delta': {'content': '', 'role': 'assistant', 'tool_calls': [{'function': {'name': '', 'arguments': '{"lo'}, 'type': 'function'}]}, 'index': 0}], 'errors': '', 'response_duration': 1.5846}
{'choices': [{'delta': {'content': '', 'role': 'assistant', 'tool_calls': [{'function': {'name': '', 'arguments': 'catio'}, 'type': 'function'}]}, 'index': 0}], 'errors': '', 'response_duration': 1.5858}
{'choices': [{'delta': {'content': '', 'role': 'assistant', 'tool_calls': [{'function': {'name': '', 'arguments': 'n": "B'}, 'type': 'function'}]}, 'index': 0}], 'errors': '', 'response_duration': 1.5868}
{'choices': [

In [3]:
# Ollama Streaming Example with tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "format": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit to use. Infer this from the users location.",
                    },
                },
                "required": ["location", "format"],
            },
        },
    },
]
messages = [
    SystemMessage(content="Call the get_current_weather function once for each city that the user mentions."),
    UserMessage(content="What's the current weather like in Boston, MA and New York, NY today?"),
]
request = ChatCompletionRequest(
    model="qwen2.5:14b",
    messages=messages,
    tools=tools,
    max_completion_tokens=400,
    temperature=0,
)
async for chunk in chat_completion_stream(request, "ollama", ollama_client):
    print(chunk.model_dump(mode="json", exclude_none=True))

{'choices': [{'delta': {'content': '', 'role': 'assistant', 'tool_calls': [{'id': '', 'function': {'name': 'get_current_weather', 'arguments': {'location': 'Boston, MA'}}, 'type': 'function'}]}, 'index': 0}], 'errors': '', 'response_duration': 0.4406}
{'choices': [{'delta': {'content': '', 'role': 'assistant', 'tool_calls': [{'id': '', 'function': {'name': 'get_current_weather', 'arguments': {'location': 'New York, NY'}}, 'type': 'function'}]}, 'index': 0}], 'errors': '', 'response_duration': 0.7869}
{'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'index': 0, 'finish_reason': 'stop'}], 'errors': '', 'completion_tokens': 47, 'prompt_tokens': 204, 'response_duration': 0.8152}
