From 5f9660e8049f573d01efd475670d5cde505e3dae Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Mon, 14 Oct 2024 16:18:13 -0700
Subject: [PATCH 1/4] Add api-key header

---
 sdk/ai/azure-ai-inference/README.md           |  9 ++--
 .../azure/ai/inference/_patch.py              | 43 ++++++++++++++++++-
 .../azure/ai/inference/aio/_patch.py          | 39 +++++++++++++++++
 ...ompletions_streaming_azure_openai_async.py |  9 ++--
 .../sample_chat_completions_azure_openai.py   |  9 ++--
 ...e_chat_completions_streaming_with_tools.py |  5 +--
 .../samples/sample_embeddings_azure_openai.py |  9 ++--
 .../tests/model_inference_test_base.py        |  4 +-
 8 files changed, 102 insertions(+), 25 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index faeab5703529..da26f0580413 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -41,7 +41,7 @@ Studio.
   * An [OpenAI Model from the catalog](https://oai.azure.com/resource/models) deployed through Azure OpenAI Studio.
   * The endpoint URL of your model, in the form `https://<your-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>`, where `your-resource-name` is your globally unique AOAI resource name, and `your-deployment-name` is your AI Model deployment name.
   * Depending on your authentication preference, you either need an API key to authenticate against the service, or Entra ID credentials. The API key is a 32-character string.
-  * An api-version. Latest preview or GA version listed in the `Data plane - inference` row in [the API Specs table](https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs). At the time of writing, latest GA version was "2024-06-01".
+  * An api-version. Latest preview or GA version listed in the `Data plane - inference` row in [the API Specs table](https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions). At the time of writing, latest GA version was "2024-06-01".
 
 ### Install the package
 
@@ -83,9 +83,8 @@ client = ChatCompletionsClient(
 # For Azure OpenAI endpoint
 client = ChatCompletionsClient(
     endpoint=endpoint,  # Of the form https://<your-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
-    credential=AzureKeyCredential(""),  # Pass in an empty value.
-    headers={"api-key": key},
-    api_version="2024-06-01",  # AOAI api-version. Update as needed.
+    credential=AzureKeyCredential(key),
+    api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
 )
 ```
 
@@ -138,7 +137,7 @@ client = ChatCompletionsClient(
     endpoint=endpoint,
     credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
     credential_scopes=["https://cognitiveservices.azure.com/.default"],
-    api_version="2024-06-01",  # AOAI api-version. Update as needed.
+    api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
 )
 ```
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index 362fa75e2a91..21ff8108ba36 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -14,7 +14,8 @@
 4. Add support for get_model_info, while caching the result (all clients)
 5. Add support for chat completion streaming (ChatCompletionsClient client only)
 6. Add support for friendly print of result objects (__str__ method) (all clients)
-7. Add support for load() method in ImageUrl class (see /models/_patch.py).
+7. Add support for load() method in ImageUrl class (see /models/_patch.py)
+8. Add support for sending two auth headers for api-key auth (all clients)
 
 """
 import json
@@ -245,8 +246,22 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+                headers = kwargs.pop("headers", {})
+                if "api-key" not in headers:
+                    headers["api-key"] = credential.key
+                kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
+
     @overload
     def complete(
         self,
@@ -724,6 +739,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+                headers = kwargs.pop("headers", {})
+                if "api-key" not in headers:
+                    headers["api-key"] = credential.key
+                kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
@@ -1007,6 +1035,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+                headers = kwargs.pop("headers", {})
+                if "api-key" not in headers:
+                    headers["api-key"] = credential.key
+                kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index ac31fdb88108..aed6393935b9 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -229,6 +229,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+                headers = kwargs.pop("headers", {})
+                if "api-key" not in headers:
+                    headers["api-key"] = credential.key
+                kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
@@ -707,6 +720,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+                headers = kwargs.pop("headers", {})
+                if "api-key" not in headers:
+                    headers["api-key"] = credential.key
+                kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
@@ -990,6 +1016,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+                headers = kwargs.pop("headers", {})
+                if "api-key" not in headers:
+                    headers["api-key"] = credential.key
+                kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
index c21a5a323966..2aa9305baa0b 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
@@ -14,7 +14,7 @@
        Entra ID authentication.
     2. Update `api_version` (the AOAI REST API version) as needed.
        See the "Data plane - inference" row in the table here for latest AOAI api-version:
-       https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+       https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     3. Set one or two environment variables, depending on your authentication method:
         * AZURE_OPENAI_CHAT_ENDPOINT - Your AOAI endpoint URL, with partial path, in the form
             https://<your-unique-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
@@ -55,9 +55,8 @@ async def sample_chat_completions_streaming_azure_openai_async():
 
         client = ChatCompletionsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value.
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     else:  # Entra ID authentication
@@ -67,7 +66,7 @@ async def sample_chat_completions_streaming_azure_openai_async():
             endpoint=endpoint,
             credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
             credential_scopes=["https://cognitiveservices.azure.com/.default"],
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     response = await client.complete(
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
index 950884fb6acf..f025eea212cb 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
@@ -14,7 +14,7 @@
        Entra ID authentication.
     2. Update `api_version` (the AOAI REST API version) as needed.
        See the "Data plane - inference" row in the table here for latest AOAI api-version:
-       https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+       https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     3. Set one or two environment variables, depending on your authentication method:
         * AZURE_OPENAI_CHAT_ENDPOINT - Your AOAI endpoint URL, with partial path, in the form
             https://<your-unique-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
@@ -54,9 +54,8 @@ def sample_chat_completions_azure_openai():
 
         client = ChatCompletionsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value.
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     else:  # Entra ID authentication
@@ -66,7 +65,7 @@ def sample_chat_completions_azure_openai():
             endpoint=endpoint,
             credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
             credential_scopes=["https://cognitiveservices.azure.com/.default"],
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions 
         )
 
     response = client.complete(
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
index da24328ece8b..dfa62afa2127 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
@@ -112,9 +112,8 @@ def get_flight_info(origin_city: str, destination_city: str):
         # Create a chat completion client for Azure OpenAI endpoint
         client = ChatCompletionsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
     else:
         # Create a chat completions client for Serverless API endpoint or Managed Compute endpoint
diff --git a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
index 998b3d6dc011..0b19e668d53d 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
@@ -14,7 +14,7 @@
        Entra ID authentication.
     2. Update `api_version` (the AOAI REST API version) as needed.
        See the "Data plane - inference" row in the table here for latest AOAI api-version:
-       https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+       https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     3. Set one or two environment variables, depending on your authentication method:
         * AZURE_OPENAI_EMBEDDINGS_ENDPOINT - Your AOAI endpoint URL, with partial path, in the form 
             https://<your-unique-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
@@ -53,9 +53,8 @@ def sample_embeddings_azure_openai():
 
         client = EmbeddingsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value.
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     else:  # Entra ID authentication
@@ -65,7 +64,7 @@ def sample_embeddings_azure_openai():
             endpoint=endpoint,
             credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
             credential_scopes=["https://cognitiveservices.azure.com/.default"],
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     response = client.embed(input=["first phrase", "second phrase", "third phrase"])
diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
index c5027e156b0b..48e0c885fcf2 100644
--- a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
+++ b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
@@ -146,7 +146,7 @@ def _load_chat_credentials(self, *, bad_key: bool, **kwargs):
         return endpoint, credential
 
     # See the "Data plane - inference" row in the table here for latest AOAI api-version:
-    # https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+    # https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     def _load_aoai_chat_credentials(self, *, key_auth: bool, bad_key: bool, **kwargs):
         endpoint = kwargs.pop("azure_openai_chat_endpoint")
         if key_auth:
@@ -258,6 +258,7 @@ def _validate_embeddings_json_request_payload(self) -> None:
         assert "MyAppId azsdk-python-ai-inference/" in headers["User-Agent"]
         assert " Python/" in headers["User-Agent"]
         assert headers["Authorization"] == "Bearer key-value"
+        assert headers["api-key"] == "key-value"
         assert self.pipeline_request.http_request.data == self.EMBEDDINGDS_JSON_REQUEST_PAYLOAD
 
     def _validate_chat_completions_json_request_payload(self) -> None:
@@ -272,6 +273,7 @@ def _validate_chat_completions_json_request_payload(self) -> None:
         assert "MyAppId azsdk-python-ai-inference/" in headers["User-Agent"]
         assert " Python/" in headers["User-Agent"]
         assert headers["Authorization"] == "Bearer key-value"
+        assert headers["api-key"] == "key-value"
         assert self.pipeline_request.http_request.data == self.CHAT_COMPLETIONS_JSON_REQUEST_PAYLOAD
 
     @staticmethod

From 67c5e7547fb42a59e18791f41e209f6de7c141af Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Tue, 15 Oct 2024 07:33:16 -0700
Subject: [PATCH 2/4] Fix Pyright errors. Rename extension to 'opentelemetry'

---
 sdk/ai/azure-ai-inference/CHANGELOG.md        |  3 ++-
 sdk/ai/azure-ai-inference/README.md           |  4 +---
 .../azure/ai/inference/_patch.py              | 24 +++++++++----------
 .../azure/ai/inference/aio/_patch.py          | 24 +++++++++----------
 sdk/ai/azure-ai-inference/setup.py            |  2 +-
 5 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md
index b13a27c344bb..1795e50f34ce 100644
--- a/sdk/ai/azure-ai-inference/CHANGELOG.md
+++ b/sdk/ai/azure-ai-inference/CHANGELOG.md
@@ -4,7 +4,8 @@
 
 ### Features Added
 
-* Support for tracing. Please find more information in the package [README.md](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md).
+* Support for OpenTelemetry tracing. Please find more information in the package [README.md](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md).
+* When constructing clients using input `credential` of type `AzureKeyCredential`, two HTTP request headers are sent simultaneously for authentication: `Authentication: Beater <key>` and `api-key: <key>` (previously only the first one was sent). This is to support different inference services, removing the need for the application to explicitly specify an additional HTTP request header.
 
 ### Breaking Changes
 
diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index c945212631a9..8f3652318525 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -60,11 +60,9 @@ pip install --upgrade azure-ai-inference
 If you want to install Azure AI Inferencing package with support for OpenTelemetry based tracing, use the following command:
 
 ```bash
-pip install azure-ai-inference[trace]
+pip install azure-ai-inference[opentelemetry]
 ```
 
-
-
 ## Key concepts
 
 ### Create and authenticate a client directly, using API key or GitHub token
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index 281e3a6ccbab..050a8d1ab96c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -254,10 +254,10 @@ def __init__(
         # The first header will be taken care of by auto-generated code.
         # The second one is added here.
         if isinstance(credential, AzureKeyCredential):
-                headers = kwargs.pop("headers", {})
-                if "api-key" not in headers:
-                    headers["api-key"] = credential.key
-                kwargs["headers"] = headers
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
 
         super().__init__(endpoint, credential, **kwargs)
 
@@ -747,10 +747,10 @@ def __init__(
         # The first header will be taken care of by auto-generated code.
         # The second one is added here.
         if isinstance(credential, AzureKeyCredential):
-                headers = kwargs.pop("headers", {})
-                if "api-key" not in headers:
-                    headers["api-key"] = credential.key
-                kwargs["headers"] = headers
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
 
         super().__init__(endpoint, credential, **kwargs)
 
@@ -1043,10 +1043,10 @@ def __init__(
         # The first header will be taken care of by auto-generated code.
         # The second one is added here.
         if isinstance(credential, AzureKeyCredential):
-                headers = kwargs.pop("headers", {})
-                if "api-key" not in headers:
-                    headers["api-key"] = credential.key
-                kwargs["headers"] = headers
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
 
         super().__init__(endpoint, credential, **kwargs)
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index 6387f0736e02..2bdfd67a40cb 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -237,10 +237,10 @@ def __init__(
         # The first header will be taken care of by auto-generated code.
         # The second one is added here.
         if isinstance(credential, AzureKeyCredential):
-                headers = kwargs.pop("headers", {})
-                if "api-key" not in headers:
-                    headers["api-key"] = credential.key
-                kwargs["headers"] = headers
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
 
         super().__init__(endpoint, credential, **kwargs)
 
@@ -728,10 +728,10 @@ def __init__(
         # The first header will be taken care of by auto-generated code.
         # The second one is added here.
         if isinstance(credential, AzureKeyCredential):
-                headers = kwargs.pop("headers", {})
-                if "api-key" not in headers:
-                    headers["api-key"] = credential.key
-                kwargs["headers"] = headers
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
 
         super().__init__(endpoint, credential, **kwargs)
 
@@ -1024,10 +1024,10 @@ def __init__(
         # The first header will be taken care of by auto-generated code.
         # The second one is added here.
         if isinstance(credential, AzureKeyCredential):
-                headers = kwargs.pop("headers", {})
-                if "api-key" not in headers:
-                    headers["api-key"] = credential.key
-                kwargs["headers"] = headers
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
 
         super().__init__(endpoint, credential, **kwargs)
 
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index 98eff8671e37..9eb9f161d675 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -69,6 +69,6 @@
     ],
     python_requires=">=3.8",
     extras_require={  
-        'trace': ['azure-core-tracing-opentelemetry']  
+        'opentelemetry': ['azure-core-tracing-opentelemetry']  
     }
 )

From c771d7f2df17158531923d94b84e2e8b061e1bc8 Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Tue, 15 Oct 2024 09:21:58 -0700
Subject: [PATCH 3/4] Switch to use a different GPT-4o model, since the
 original one is not responding

---
 sdk/ai/azure-ai-inference/setup.py                          | 4 ++--
 .../azure-ai-inference/tests/model_inference_test_base.py   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
index 9eb9f161d675..c264ae00239e 100644
--- a/sdk/ai/azure-ai-inference/setup.py
+++ b/sdk/ai/azure-ai-inference/setup.py
@@ -35,7 +35,7 @@
     license="MIT License",
     author="Microsoft Corporation",
     author_email="azpysdkhelp@microsoft.com",
-    url="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk",
+    url="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference",
     keywords="azure, azure sdk",
     classifiers=[
         "Development Status :: 4 - Beta",
@@ -68,7 +68,7 @@
         "typing-extensions>=4.6.0",
     ],
     python_requires=">=3.8",
-    extras_require={  
+    extras_require={
         'opentelemetry': ['azure-core-tracing-opentelemetry']  
     }
 )
diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
index 48e0c885fcf2..8308ff979942 100644
--- a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
+++ b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
@@ -49,13 +49,13 @@
 # hosted on Azure OpenAI (AOAI) endpoint.
 # TODO: When we have a MaaS model that supports chat completions with image input,
 # use that instead.
-# AZURE_OPENAI_CHAT_ENDPOINT=https://<endpont-name>.openai.azure.com/openai/deployments/gpt-4o
+# AZURE_OPENAI_CHAT_ENDPOINT=https://<endpont-name>.openai.azure.com/openai/deployments/gpt-4o-0806
 # AZURE_OPENAI_CHAT_KEY=<32-char-api-key>
 #
 ServicePreparerAOAIChatCompletions = functools.partial(
     EnvironmentVariableLoader,
     "azure_openai_chat",
-    azure_openai_chat_endpoint="https://your-deployment-name.openai.azure.com/openai/deployments/gpt-4o",
+    azure_openai_chat_endpoint="https://your-deployment-name.openai.azure.com/openai/deployments/gpt-4o-0806",
     azure_openai_chat_key="00000000000000000000000000000000",
 )
 
@@ -158,7 +158,7 @@ def _load_aoai_chat_credentials(self, *, key_auth: bool, bad_key: bool, **kwargs
             credential = self.get_credential(sdk.ChatCompletionsClient, is_async=False)
             credential_scopes: list[str] = ["https://cognitiveservices.azure.com/.default"]
             headers = {}
-        api_version = "2024-06-01"
+        api_version = "2024-08-01-preview"
         return endpoint, credential, credential_scopes, headers, api_version
 
     def _load_embeddings_credentials(self, *, bad_key: bool, **kwargs):

From c3137fa41baa8626aef0d36c2ee5c8fcad1f6bcc Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Tue, 15 Oct 2024 09:22:46 -0700
Subject: [PATCH 4/4] Update test recordings

---
 sdk/ai/azure-ai-inference/assets.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json
index fdb9e05b8246..ff762c59e663 100644
--- a/sdk/ai/azure-ai-inference/assets.json
+++ b/sdk/ai/azure-ai-inference/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-inference",
-  "Tag": "python/ai/azure-ai-inference_19a0adafc6"
+  "Tag": "python/ai/azure-ai-inference_3934744053"
 }