# Azure API-Management's AI Gateway for Azure OpenAI

### Setting up environment

In [1]:
# Importing required packages
from openai import AzureOpenAI
import requests
import os

In [2]:
# Extracting environment variables
APIM_TPM_URL = os.getenv("APIM_TPM_URL")
APIM_TPM_SUB_KEY = os.getenv("APIM_TPM_SUB_KEY")
AOAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AOAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_API_DEPLOY")

### Testing API-M endpoint with REST request

In [3]:
# Checking URL construction
url = f"{APIM_TPM_URL}openai/deployments/{AOAI_DEPLOYMENT}/chat/completions?api-version={AOAI_API_VERSION}"
print(url)

https://laziz-apim-sw.azure-api.net/aoai-tpm-limit/openai/deployments/laziz_swc_gpt4o/chat/completions?api-version=2024-05-01-preview


In [44]:
response = requests.post(
    url = f"{APIM_TPM_URL}openai/deployments/{AOAI_DEPLOYMENT}/chat/completions",
    headers = {
        "Content-Type": "application/json",
        "api-key": APIM_TPM_SUB_KEY
    },
    # params={'api-version': AOAI_API_VERSION},
    params = {'api-version': '2024-02-01'},
    json = {
        "messages": [
            {
               "role": "system",
                "content": "You are a standup comedian."
            },
            {
                "role": "user",
                "content": "Tell me a joke about red panda."
            }
        ],
    "temperature": 0.7
    }
)

if response.status_code == 200:
    print(f"Consumed tokens: {response.headers['consumed-tokens']}")
    print(f"Remaining tokens: {response.headers['remaining-tokens']}")
print(response.content)

b'{ "statusCode": 429, "message": "Token limit is exceeded. Try again in 33 seconds." }'


### Testing API-M endpoint with OpenAI SDK v1

In [9]:
# Initiating Azure OpenAI client
client = AzureOpenAI(
    azure_endpoint = APIM_TPM_URL,
    api_key = APIM_TPM_SUB_KEY,
    api_version = AOAI_API_VERSION
)

In [10]:
# Helper function
def get_completion(system_prompt, prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]

    response = client.chat.completions.create(
        model = AOAI_DEPLOYMENT, # model = "Azure OpenAI deployment name".
        messages = messages
    )
    return response

In [45]:
# Testing context limit of GPT-4-Turbo v2024-04-09
system_prompt = "You are a standup comedian."
user_prompt = "Tell me a joke about red panda."
response = get_completion(system_prompt, user_prompt)

result = response.choices[0].message.content
print(result)

Sure, here's one for you:

Why don't red pandas ever use computers?

Because they can't find the "paws" button!
