# APIM ❤️ AI Foundry

## Test your Azure AI Foundry models, enabled through Azure API Management!

Use this Jupyter notebook with Python code snippets to verify proper functionality of your Azure AI Foundry models when accessed through AI Gateway features in Azure API Management (APIM).

<a id='0'></a>
### ⚙️ Initialize client tool for your APIM service

👉 An existing Azure AI Foundry API is expected to be already configured on APIM

In [None]:
import sys, json, requests
sys.path.insert(1, '../shared')  # add the shared directory to the Python path
import utils
from apimtools import APIMClientTool

model_name = "gpt-4.1"
inference_api_version = "2025-03-01-preview"

try:
    apimClientTool = APIMClientTool(
        "lab-ai-gateway" ## specify the resource group name where the API Management resource is located, or optionally add another parameter with the apim_resource_name
    )
    apimClientTool.initialize()
    apimClientTool.discover_api('/openai') # replace with /models for inference API

    apim_resource_gateway_url = str(apimClientTool.apim_resource_gateway_url)
    foundry_project_endpoint = f"{apim_resource_gateway_url.replace('apim-', 'foundry-').replace('.azure-api.net', '.services.ai.azure.com')}/api/projects/default"
    azure_endpoint = str(apimClientTool.azure_endpoint)
    chat_completions_url = f"{azure_endpoint}/openai/deployments/{model_name}/chat/completions?api-version={inference_api_version}"
    api_key = apimClientTool.apim_subscriptions[1].get("key") # Ensure that you have created a subscription in APIM

    utils.print_ok(f"Testing tool initialized successfully!")
except Exception as e:
    utils.print_error(f"Error initializing APIM Client Tool: {e}")



<a id='sdk'></a>
### 🧪 Test the API using the Azure OpenAI Python SDK



In [None]:
import time
from openai import AzureOpenAI


client = AzureOpenAI(
    azure_endpoint=azure_endpoint,
    api_key=api_key,
    api_version=inference_api_version
)
response = client.chat.completions.create(model=model_name, messages=[
                {"role": "system", "content": "You are a sarcastic, unhelpful assistant."},
                {"role": "user", "content": "Can you tell me the time, please?"}
])
print("💬 ",response.choices[0].message.content)

<a id='requests'></a>
### 🧪 Test the API using a direct HTTP call


In [None]:
messages={"messages":[
    {"role": "system", "content": "You are a sarcastic, unhelpful assistant."},
    {"role": "user", "content": "Can you tell me the time, please?"}
]}
chat_completions_url = f"{azure_endpoint}/openai/deployments/{model_name}/chat/completions?api-version={inference_api_version}"
response = requests.post(chat_completions_url, headers = {'api-key':api_key}, json = messages)
utils.print_response_code(response)
utils.print_info(f"headers {response.headers}")
utils.print_info(f"x-ms-region: {response.headers.get("x-ms-region")}") # this header is useful to determine the region of the backend that served the request
if (response.status_code == 200):
    data = json.loads(response.text)
    print("💬 ", data.get("choices")[0].get("message").get("content"))
else:
    utils.print_error(response.text)

<a id='requests'></a>
### 🧪 Send multiple requests to surpass the established token rate limit


In [None]:
import requests

messages={"messages":[
    {"role": "system", "content": "You are a sarcastic, unhelpful assistant."},
    {"role": "user", "content": "Can you tell me the time, please?"}
]}

api_runs = []
for i in range(20):
    response = requests.post(chat_completions_url, headers = {'api-key': api_key}, json = messages)
    utils.print_response_code(response)
    if (response.status_code == 200):
        data = json.loads(response.text)
        total_tokens = data.get("usage").get("total_tokens")
        print("💬 ", data.get("choices")[0].get("message").get("content"))
    else:
        print(response.text)
        total_tokens = 0
    api_runs.append((total_tokens, response.status_code))

<a id='plot'></a>
### 🔍 Analyze Token Rate limiting results


In [None]:
# plot the results
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [15, 7]
df = pd.DataFrame(api_runs, columns=['Tokens', 'Status Code'])
df['Run'] = range(1, len(df) + 1)
colors = ['red' if str(code).startswith('5') else 'yellow' if str(code).startswith('4') else 'lightblue' for code in df['Status Code']]
ax = df.plot(kind='bar', x='Run', y='Tokens', color=colors, legend=False)
plt.title('Rate Limiting results')
plt.xlabel('Runs')
plt.ylabel('Tokens')
plt.xticks(df['Run'], rotation=0)
for i, val in enumerate(df['Status Code']):
    ax.text(i, 20, '' if int(val) == 200 else '[429]', ha='center', va='bottom')
for i, val in enumerate(df['Tokens']):
    ax.text(i, df['Tokens'][i] + 5, '' if int(val) == 0 else val, ha='center', va='bottom')
accumulated_tokens = df['Tokens'].cumsum()
ax.plot(df['Run']-1, accumulated_tokens, color='green', label='Accumulated Tokens')
for i, val in enumerate(accumulated_tokens):
    ax.text(i, val + 6, str(int(val)), ha='center', va='bottom', label='Accumulated Tokens')
plt.show()

<a id='Azure AI Agents'></a>
### 🧪 Execute an [Azure AI Foundry Agent using MCP Tools](https://learn.microsoft.com/en-us/azure/ai-foundry/agents/how-to/tools/model-context-protocol)


In [None]:
from azure.ai.agents.models import ListSortOrder, MessageTextContent, McpTool, RequiredMcpToolCall, SubmitToolApprovalAction, ToolApproval
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
import time

project_client = AIProjectClient(endpoint=foundry_project_endpoint,
            credential=DefaultAzureCredential())
agents_client = project_client.agents

# MCP tool definition
mcp_tool = McpTool(
    server_label="weather",
    server_url=f"{apim_resource_gateway_url}/weather-mcp/sse",
)

prompt = "What's the weather in San Francisco, Seattle and Lisbon?"

# Agent creation
agent = agents_client.create_agent(
    model=model_name,
    name="agent-mcp",
    instructions="You are a weather agent.",
    tools=mcp_tool.definitions
)

print(f"🎉 Created agent, agent ID: {agent.id}")
print(f"✨ MCP Server: {mcp_tool.server_label} at {mcp_tool.server_url}")

# Thread creation
thread = agents_client.threads.create()
print(f"🧵 Created thread, thread ID: {thread.id}")

# Message creation
message = agents_client.messages.create(
    thread_id=thread.id,
    role="user",
    content=prompt,
)
print(f"💬 Created message, message ID: {message.id}")

mcp_tool.set_approval_mode("never")          # Disable human approval

# Run
run = agents_client.runs.create(thread_id=thread.id, agent_id=agent.id, tool_resources=mcp_tool.resources)
while run.status in ["queued", "in_progress", "requires_action"]:
    time.sleep(2)
    run = agents_client.runs.get(thread_id=thread.id, run_id=run.id)
    print(f"⏳ Run status: {run.status}")
if run.status == "failed":
    print(f"❌ Run error: {run.last_error}")

# Get Run steps
run_steps = agents_client.run_steps.list(thread_id=thread.id, run_id=run.id)
print()

for step in run_steps:
    print(f"🔄 Run step: {step.id}, status: {step.status}, type: {step.type}")
    if step.type == "tool_calls":
        print(f"🛠️ Tool call details:")
        for tool_call in step.step_details.tool_calls:
            print(json.dumps(tool_call.as_dict(), indent=5))

# Get the messages in the thread
print("\n📜 Messages in the thread:")
messages = agents_client.messages.list(thread_id=thread.id, order=ListSortOrder.ASCENDING)

for item in messages:
    last_message_content = item.content[-1]
    if isinstance(last_message_content, MessageTextContent):
        print(f"🗨️ {item.role}: {last_message_content.text.value}")

# Clean up resources
# agents_client.delete_agent(agent.id)
