# APIM ‚ù§Ô∏è AI Foundry

## Test Apps Authorizations

Use this Jupyter notebook containing Python code snippets to validate the functionality of the APIM Applications feature with Azure AI Foundry.

<a id='0'></a>
### ‚öôÔ∏è Initialize client tool for your APIM service

üëâ An existing Azure OpenAI API is expected to be already configured on APIM

In [None]:
import sys, json, requests
sys.path.insert(1, '../shared')  # add the shared directory to the Python path
import utils
from apimtools import APIMClientTool

model_name = "gpt-4o-mini"
inference_api_version = "2024-10-21"

try:
    output = utils.run("az account show", "Retrieved az account", "Failed to get the current az account")

    if output.success and output.json_data:
        current_user = output.json_data['user']['name']
        tenant_id = output.json_data['tenantId']
        subscription_id = output.json_data['id']

        utils.print_info(f"Current user: {current_user}")
        utils.print_info(f"Tenant ID: {tenant_id}")
        utils.print_info(f"Subscription ID: {subscription_id}")

    apimClientTool = APIMClientTool(
        "lab-..." ## specify the resource group name where the API Management resource is located, or optionally add another parameter with the apim_resource_name
    )
    apimClientTool.initialize()
    apimClientTool.discover_api('/openai')

    apim_api_endpoint = str(apimClientTool.azure_endpoint)
    chat_completions_url = f"{apim_api_endpoint}/openai/deployments/{model_name}/chat/completions?api-version={inference_api_version}"
    api_keys = [ apimClientTool.apim_subscriptions[5].get("key"),
                apimClientTool.apim_subscriptions[6].get("key"), 
                apimClientTool.apim_subscriptions[7].get("key"), 
                apimClientTool.apim_subscriptions[8].get("key") ] 
    utils.print_ok(f"Testing tool initialized successfully!")
except Exception as e:
    utils.print_error(f"Error initializing APIM Client Tool: {e}")

client_id = "" # retrieve the client ID from the APIM Applications overview page
client_secret = "" # generate a new client secret in the APIM Applications overview page
product_app_id = "" # retrieve the product app ID from the APIM Product overview page

client_id = ""
client_secret = ""
models = ["gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3-mini", "o4-mini"]


<a id='0'></a>
### ‚öôÔ∏è Generate an OAuth client token to consume the Product API


In [None]:
import requests, base64, json

product_app_id = "ec1687d3-3600-46b0-9e3d-1467eb13aa37"

body = {
    "grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret, "scope": f"api://{product_app_id}/.default"
}

response = requests.post(f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token", data=body, headers={"Content-Type": "application/x-www-form-urlencoded"})

if response.status_code == 200:
    token = response.json().get("access_token")
    header, payload, signature = token.split('.')
    def pad(b): return b + '=' * (-len(b) % 4)
    print(json.dumps(json.loads(base64.urlsafe_b64decode(pad(header)).decode('utf-8')), indent=4))
    print(json.dumps(json.loads(base64.urlsafe_b64decode(pad(payload)).decode('utf-8')), indent=4))
else:
    print(f"Failed to retrieve token: {response.status_code} - {response.text}")

<a id='sdk'></a>
### üß™ Test the API using the Azure OpenAI Python SDK



In [None]:
from openai import AzureOpenAI

client = AzureOpenAI(
    azure_endpoint=f"{apim_api_endpoint}/{inference_api_path}",
    azure_ad_token=token, # We are using a token and NO API key.
    api_version=inference_api_version, 
)

response = client.chat.completions.create(model=model_name, messages=[
        {"role": "system", "content": "You are a sarcastic, unhelpful assistant."},
        {"role": "user", "content": "Can you tell me the time, please?"}
])

print("üí¨ ",response.choices[0].message.content)

<a id='requests'></a>
### üß™ Send multiple random prompts


In [None]:
import requests, random, time

with open("sample-prompts.json", "r", encoding="utf-8") as f:
    sample_prompts = json.load(f)

api_runs = []
for i in range(10):
    # prompt = random.choice(sample_prompts)
    prompt = sample_prompts[0]

    messages={"messages":[
        {"role": "system", "content": "You are a sarcastic, unhelpful assistant."},
        {"role": "user", "content": prompt.get("question")}
    ]}

    print(f"üí¨ ", prompt.get("question"))
    start_time = time.time()
    response = requests.post(chat_completions_url, 
                             headers={'api-key': random.choice(api_keys)}, json = messages)
    response_time = time.time() - start_time
    utils.print_response_code(response)
    if (response.status_code == 200):
        data = json.loads(response.text)
        total_tokens = data.get("usage").get("total_tokens")
        print(f"‚åö {response_time:.2f} seconds. üó®Ô∏è ", data.get("choices")[0].get("message").get("content"))
    else:
        print(response.text)
        total_tokens = 0
    api_runs.append((total_tokens, response.status_code, response_time))
    print("------------------------------------------------------------------------------------------")

<a id='plot'></a>
### üîç Analyze Token Rate limiting results


In [None]:
# plot the results
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [15, 7]
df = pd.DataFrame(api_runs, columns=['Tokens', 'Status Code', 'Response Time'])
df['Run'] = range(1, len(df) + 1)
colors = ['red' if str(code).startswith('5') else 'yellow' if str(code).startswith('4') else 'lightblue' for code in df['Status Code']]
ax = df.plot(kind='bar', x='Run', y='Tokens', color=colors, legend=False)
plt.title('Rate Limiting results')
plt.xlabel('Runs')
plt.ylabel('Tokens')
plt.xticks(df['Run'], rotation=0)
for i, val in enumerate(df['Status Code']):
    ax.text(i, 20, '' if int(val) == 200 else '[429]', ha='center', va='bottom')
for i, val in enumerate(df['Tokens']):
    ax.text(i, df['Tokens'][i] + 5, '' if int(val) == 0 else val, ha='center', va='bottom')
accumulated_tokens = df['Tokens'].cumsum()
ax.plot(df['Run']-1, accumulated_tokens, color='green', label='Accumulated Tokens')
for i, val in enumerate(accumulated_tokens):
    ax.text(i, val + 6, str(int(val)), ha='center', va='bottom', label='Accumulated Tokens')
plt.show()

<a id='plot'></a>
### üîç Analyze Semantic Caching performance

The first request should take a longer time as it makes it all the way to the Azure OpenAI backend. The subsequent requests should be much quicker as they draw from the semantic cache. 

In [None]:
# plot the results
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams['figure.figsize'] = [15, 5]
df = pd.DataFrame(api_runs, columns=['Tokens', 'Status Code', 'Response Time'])
df['Run'] = range(1, len(df) + 1)
df.plot(kind='bar', x='Run', y='Response Time', legend=False)
plt.title('Semantic Caching Performance')
plt.xlabel('Runs')
plt.ylabel('Response Time (s)')
plt.xticks(rotation=0)  # Set x-axis ticks to be the run numbers

average = df['Response Time'].mean()
plt.axhline(y=average, color='r', linestyle='--', label=f'Average: {average:.2f}')
plt.legend()

plt.show()

<a id='requests'></a>
### üß™ Test the API using a direct HTTP call


In [None]:
from pprint import pprint

messages={"messages":[
    {"role": "system", "content": "You are a sarcastic, unhelpful assistant."},
    {"role": "user", "content": "Can you tell me the time, please?"}
]}

response = requests.post(chat_completions_url, 
                         headers={'Authorization': 'Bearer ' + token}, json=messages)

utils.print_response_code(response)
utils.print_info(f"Headers:")
pprint(dict(response.headers))
if (response.status_code == 200):
    data = json.loads(response.text)
    print("üí¨ ", data.get("choices")[0].get("message").get("content"))
else:
    utils.print_error(response.text)