In [12]:
# Azure OpenAI Assistants API with functions

import os
from dotenv import load_dotenv
from openai import AzureOpenAI

# Load environment variables from .env file
# AZURE_OPENAI_API_KEY
# AZURE_OPENAI_ENDPOINT
# AZURE_OPENAI_API_VERSION
# SEARCH_KEY
load_dotenv()

# Create Azure OpenAI client
client = AzureOpenAI(
    api_key=os.getenv('AZURE_OPENAI_API_KEY'),
    azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'),
    api_version=os.getenv('AZURE_OPENAI_API_VERSION')
)

# assistant ID as created in the portal
assistant_id = "asst_Z2YGBjhORYJGyPv6AQ4HugzP"

## Create a thread

A thread is not linked to the assistant at creation time.

In [13]:
# Create a thread
thread = client.beta.threads.create()

# Threads have an id as well
print("Thread id: ", thread.id)

Thread id:  thread_zPVdxn0yxRysszn5g9kkCSVo


## Add a message to the thread

The assistant can answer questions by searching the blog, blog.baeke.info. It should do that with the search_blog function as defined in the assistant.

The function json in the assistant is:

```json
{
  "name": "search_blog",
  "description": "Search blog.baeke.info",
  "parameters": {
    "type": "object",
    "properties": {
      "query": {
        "type": "string",
        "description": "Query used by the search"
      }
    },
    "required": [
      "query"
    ]
  }
}
```

The function takes one parameter: the query.

After adding the user message and running the thread, we show information about the run via a json dump.

In [14]:
import time
from IPython.display import clear_output

# function returns the run when status is no longer queued or in_progress
def wait_for_run(run, thread_id):
    while run.status == 'queued' or run.status == 'in_progress':
        run = client.beta.threads.runs.retrieve(
                thread_id=thread_id,
                run_id=run.id
        )
        time.sleep(0.5)

    return run


# create a message
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What is the meaning of life?"
)

# create a run 
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id # use the assistant id defined in the first cell
)

# wait for the run to complete
run = wait_for_run(run, thread.id)

# show information about the run
# should indicate that run status is requires_action
# should contain information about the tools to call
print(run.model_dump_json(indent=2))

{
  "id": "run_afYakj2F9k8bXta0qZUE0oLI",
  "assistant_id": "asst_Z2YGBjhORYJGyPv6AQ4HugzP",
  "cancelled_at": null,
  "completed_at": null,
  "created_at": 1707430061,
  "expires_at": 1707430661,
  "failed_at": null,
  "file_ids": [],
  "instructions": "You answer questions about the blog of Geert Baeke. The blog can be found at https://blog.baeke.info.\n\nEvery technical question you get should be answered by a function. If the results of the function are not relevant, inform the user.\n",
  "last_error": null,
  "metadata": {},
  "model": "gpt-4-preview",
  "object": "thread.run",
  "required_action": {
    "submit_tool_outputs": {
      "tool_calls": [
        {
          "id": "call_HDEGQgUUTA1LFEcRxbj7DJTh",
          "function": {
            "arguments": "{\"query\":\"OpenAI Assistant API\"}",
            "name": "search_blog"
          },
          "type": "function"
        }
      ]
    },
    "type": "submit_tool_outputs"
  },
  "started_at": 1707430062,
  "status": "requir

## Helper function to search the blog

The search_blog function is a helper function that uses the requests library to search the blog. It returns multiple results via a similarity searh in Azure AI Search.

We could query Azure AI Search directly here but the API that is used was already created and running as an Azure Container App.

In [16]:
# search the blog

import requests
import json

def search_blog(api_key, query):
    url = "https://myblog.gentlebay-4474176e.westeurope.azurecontainerapps.io/generate_response"
    headers = {"api-key": api_key}
    data = {"query": query}
    response = requests.post(url, headers=headers, data=json.dumps(data))
    
    if response.status_code == 200:
        return response.json()
    else:
        return response.status_code, response.text

## Checking if we need to use a tool

Below we check if we need to use a tool. We assume we need to here. We are not taking into account a scenario where we do not need to use a tool. In reality, we would need to allow for that scenario.

If the assistant indicates we need to use a tool, it will tell use the function name and the arguments to use based on the function definition defined in the assistant. We will then call the search_blog function with the arguments and pass the tool call results back to the assistant.

After passing the tool call results, we run the thread again and show the messages via a json dump.

In [17]:
import json

# we only check for required_action here
# required action means we need to call a tool
if run.required_action:
    # get tool calls and print them
    # check the output to see what tools_calls contains
    tool_calls = run.required_action.submit_tool_outputs.tool_calls
    print("Tool calls:", tool_calls)

    # we might need to call multiple tools
    # the assistant API supports parallel tool calls
    # we account for this here although we only have one tool call
    tool_outputs = []
    for tool_call in tool_calls:
        func_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)

        # call the function with the arguments provided by the assistant
        if func_name == "search_blog":
            result = search_blog(os.getenv('SEARCH_KEY'), **arguments)
            print("Search result:", result)

        # append the results to the tool_outputs list
        # you need to specify the tool_call_id so the assistant knows which tool call the output belongs to
        tool_outputs.append({
            "tool_call_id": tool_call.id,
            "output": json.dumps(result)
        })

    # now that we have the tool call outputs, pass them to the assistant
    run = client.beta.threads.runs.submit_tool_outputs(
        thread_id=thread.id,
        run_id=run.id,
        tool_outputs=tool_outputs
    )

    print("Tool outputs submitted")

    # now we wait for the run again
    run = wait_for_run(run, thread.id)
else:
    print("No tool calls identified\n")

# show information about the run
print("Run information:")
print("----------------")
print(run.model_dump_json(indent=2), "\n")

# now print all messages in the thread
print("Messages in the thread:")
print("-----------------------")
messages = client.beta.threads.messages.list(thread_id=thread.id)
print(messages.model_dump_json(indent=2))

Tool calls: [RequiredActionFunctionToolCall(id='call_HDEGQgUUTA1LFEcRxbj7DJTh', function=Function(arguments='{"query":"OpenAI Assistant API"}', name='search_blog'), type='function')]
Search result: {'response': [{'title': 'Trying the OpenAI Assistants API', 'content': 'If you have ever tried to build an AI assistant, you know that is not a simple task. In almost all cases, your assistant needs access to external knowledge such as documents or APIs. You might even want to provide your assistant a code sandbox to solve user queries with code. When your assistant is accessed via a chat application, you also have to implement chat history.\nAlthough there are several frameworks like LangChain and Semantic Kernel that can help, OpenAI recently released the Assistants API. It is their own API, tied to their models. The primitives of an assistant are Assistants, Threads and Runs. Let’s start by creating an assistant.\nNote: this post contains code snippets in Python. You can find the full exa