# Azure API-Management's AI Gateway for Azure OpenAI

This notebook explores the usage analysis by subscription.

### Setting up environment

In [1]:
# Importing required packages
from openai import AzureOpenAI
import requests
import random
import time
import os

In [2]:
# Extracting environment variables
APIM_USAGE_URL = os.getenv("APIM_USAGE_URL")
SUBSCRIPTION_KEYS = [
    os.getenv("APIM_USAGE_KEY_CONTOSO"),
    os.getenv("APIM_USAGE_KEY_NORTHWIND")
]
AOAI_API_VERSION = os.getenv("APIM_USAGE_API_VERSION")
AOAI_DEPLOYMENT = os.getenv("APIM_USAGE_AOAI_DEPLOY")

In [3]:
# Defining custom variables
SYSTEM_PROMPT = "You are a standup comedian."
USER_PROMPT = "Tell me a joke about red panda."
NUMBER_OF_RUNS = 15
SLEEP_TIME = 1
TEMPERATURE = 0.7

### API-M usage generation with REST request

In [4]:
# Helper function for REST API call
def get_rest_completion(subscription_key, system_prompt, user_prompt):
    response = requests.post(
        url = f"{APIM_USAGE_URL}openai/deployments/{AOAI_DEPLOYMENT}/chat/completions",
        headers = {
            "Content-Type": "application/json",
            "api-key": subscription_key
        },
        params={'api-version': AOAI_API_VERSION},
        json = {
            "messages": [
                {
                   "role": "system",
                    "content": system_prompt
                },
                {
                    "role": "user",
                    "content": user_prompt
                }
            ],
        "temperature": TEMPERATURE
        }
    )
    return response

In [5]:
# Generating usage with REST API
for key in SUBSCRIPTION_KEYS:
    randomness = random.randint(0, 5)
    for i in range(NUMBER_OF_RUNS - randomness):    
        start_time = time.time()
        response = get_rest_completion(subscription_key=key, system_prompt=SYSTEM_PROMPT, user_prompt=USER_PROMPT)
        end_time = time.time()
        print(f"Run # {i} completed in {end_time - start_time:.2f} seconds with response code {response.status_code}")
    
        if i < NUMBER_OF_RUNS - 1:
            print(f"Pausing for {SLEEP_TIME} seconds...")
            time.sleep(SLEEP_TIME)
    print("-----------------------------")

Run # 0 completed in 1.07 seconds with response code 200
Pausing for 1 seconds...
Run # 1 completed in 0.73 seconds with response code 200
Pausing for 1 seconds...
Run # 2 completed in 0.67 seconds with response code 200
Pausing for 1 seconds...
Run # 3 completed in 0.70 seconds with response code 200
Pausing for 1 seconds...
Run # 4 completed in 0.91 seconds with response code 200
Pausing for 1 seconds...
Run # 5 completed in 0.83 seconds with response code 200
Pausing for 1 seconds...
Run # 6 completed in 0.78 seconds with response code 200
Pausing for 1 seconds...
Run # 7 completed in 0.76 seconds with response code 200
Pausing for 1 seconds...
Run # 8 completed in 0.67 seconds with response code 200
Pausing for 1 seconds...
Run # 9 completed in 0.68 seconds with response code 200
Pausing for 1 seconds...
Run # 10 completed in 0.71 seconds with response code 200
Pausing for 1 seconds...
Run # 11 completed in 1.89 seconds with response code 200
Pausing for 1 seconds...
--------------

### API-M usage generation with OpenAI SDK v1

In [6]:
# Initiating array of Azure OpenAI clients
aoai_clients = []
for key in SUBSCRIPTION_KEYS:
    client = AzureOpenAI(
        azure_endpoint = APIM_USAGE_URL,
        api_key = key,
        api_version = AOAI_API_VERSION
    )
    aoai_clients.append(client)

In [7]:
# Helper function for SDK call
def get_sdk_completion(aoai_client, system_prompt, user_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    response = aoai_client.chat.completions.create(
        model = AOAI_DEPLOYMENT,
        messages = messages,
        temperature = TEMPERATURE
    )
    return response

In [8]:
# Generating usage with SDK
for client in aoai_clients:
    randomness = random.randint(0, 5)
    for i in range(NUMBER_OF_RUNS - randomness):
        start_time = time.time()
        response = get_sdk_completion(aoai_client=client, system_prompt=SYSTEM_PROMPT, user_prompt=USER_PROMPT)
        end_time = time.time()
        print(f"Run # {i} completed in {end_time - start_time:.2f} seconds")
    
        if i < NUMBER_OF_RUNS - 1:
            print(f"Pausing for {SLEEP_TIME} seconds...")
            time.sleep(SLEEP_TIME)
    print("-----------------------------")

Run # 0 completed in 0.72 seconds
Pausing for 1 seconds...
Run # 1 completed in 0.49 seconds
Pausing for 1 seconds...
Run # 2 completed in 0.60 seconds
Pausing for 1 seconds...
Run # 3 completed in 0.49 seconds
Pausing for 1 seconds...
Run # 4 completed in 0.90 seconds
Pausing for 1 seconds...
Run # 5 completed in 0.52 seconds
Pausing for 1 seconds...
Run # 6 completed in 0.61 seconds
Pausing for 1 seconds...
Run # 7 completed in 0.64 seconds
Pausing for 1 seconds...
Run # 8 completed in 0.50 seconds
Pausing for 1 seconds...
Run # 9 completed in 0.56 seconds
Pausing for 1 seconds...
Run # 10 completed in 0.51 seconds
Pausing for 1 seconds...
Run # 11 completed in 0.52 seconds
Pausing for 1 seconds...
-----------------------------
Run # 0 completed in 0.68 seconds
Pausing for 1 seconds...
Run # 1 completed in 0.50 seconds
Pausing for 1 seconds...
Run # 2 completed in 0.49 seconds
Pausing for 1 seconds...
Run # 3 completed in 0.57 seconds
Pausing for 1 seconds...
Run # 4 completed in 0.5