Merge pull request #12 from SpellcraftAI/dev

feat: add Azure support
SpellcraftAI · Mar 2, 2024 · c9b7819 · c9b7819
2 parents 30549f6 + 3e391fe
commit c9b7819
Show file tree

Hide file tree

Showing 11 changed files with 193 additions and 41 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -3,6 +3,9 @@ name: Build, Test, Publish
 on: push
 env:
   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
+  AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+  AZURE_OPENAI_VERSION: ${{ secrets.AZURE_OPENAI_VERSION }}
 
 jobs:
   build:

diff --git a/README.ipynb b/README.ipynb
@@ -903,6 +903,32 @@
     "\n",
     "await batch.run()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "## Notes\n",
+    "\n",
+    "1. It is not possible to perfectly guarantee the tokens per minute limit is not\n",
+    "   breached because we cannot know the total token usage until the response\n",
+    "   comes back.\n",
+    "   \n",
+    "   Use the `safety` param to set the rate limit tolerance.  By default it is set\n",
+    "   to 10%, and will wait until the predicted TPM (the current TPM plus the\n",
+    "   average number of tokens per request) drops below 90% of the limit.\n",
+    "\n",
+    "  \n",
+    "2. By default, important logs are stored at `oaib.txt`.  This can be disabled\n",
+    "   using `loglevel=0`.\n",
+    "\n",
+    "3. There's an error with TPM/RPM progress bar display in Jupyter Notebooks for\n",
+    "   the `Auto` class only. This is caused by a `tqdm.notebook` bug where only the\n",
+    "   initial totals (here, our limits) are used to calculate the width of the bar,\n",
+    "   and the `Auto` class updates these values only after the first request. The\n",
+    "   text percentage displays are accurate."
+   ]
   }
  ],
  "metadata": {
@@ -921,7 +947,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.0.0"
   }
  },
  "nbformat": 4,

diff --git a/README.md b/README.md
@@ -334,28 +334,6 @@ Run took 12.58s.
 <p>1000 rows × 4 columns</p>
 
 
-## Notes
-
-1. It is not possible to perfectly guarantee the tokens per minute limit is not
-   breached because we cannot know the total token usage until the response
-   comes back.
-
-   Use the `safety` param to set the rate limit tolerance.  By default it is set
-   to 10%, and will wait until the predicted TPM (the current TPM plus the
-   average number of tokens per request) drops below 90% of the limit.
-
-
-2. By default, important logs are stored at `oaib.txt`.  This can be disabled
-   using `loglevel=0`.
-
-3. There's an error with TPM/RPM progress bar display in Jupyter Notebooks for
-   the `Auto` class only. This is caused by a `tqdm.notebook` bug where only the
-   initial totals (here, our limits) are used to calculate the width of the bar,
-   and the `Auto` class updates these values only after the first request. The
-   text percentage displays are accurate.
-
-
-
 ### Metadata and Index
 
 You can add custom metadata to your observations with `add(metadata={...}`, and
@@ -525,4 +503,62 @@ Run took 1.43s.
 </table>
 </div>
 
+### Use with Microsoft Azure
+<sub>See [`tests/test_azure.py`](tests/test_azure.py).</sub>
+
+To use a Cognitive Services deployment:
+
+#### 1. Go to to `Azure OpenAI Studio > Chat playground > View Code`.
+
+This view will provide your Azure endpoint, API version, and API key.
+
+#### 2. Use `AzureConfig` to configure the Azure endpoint.
+
+With `AZURE_OPENAI_KEY`, `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_VERSION` env keys
+set:
+
+```python
+from oaib import Batch, AzureConfig
+
+# Auto is not supported for Azure.
+azure = AzureConfig()
+batch = Batch(azure=azure)
+```
+
+Or, manually:
+
+```python
+import os
+from oaib import Batch, AzureConfig
+
+azure = AzureConfig(
+  azure_endpoint = "https://spellcraft.openai.azure.com/", 
+  api_key=os.getenv("AZURE_OPENAI_KEY"),  
+  api_version="2024-02-15-preview"
+)
+
+# Auto is not supported for Azure.
+batch = Batch(azure=azure)
+```
+
+
+## Notes
+
+1. It is not possible to perfectly guarantee the tokens per minute limit is not
+   breached because we cannot know the total token usage until the response
+   comes back.
+
+   Use the `safety` param to set the rate limit tolerance.  By default it is set
+   to 10%, and will wait until the predicted TPM (the current TPM plus the
+   average number of tokens per request) drops below 90% of the limit.
+
+
+2. By default, important logs are stored at `oaib.txt`.  This can be disabled
+   using `loglevel=0`.
+
+3. There's an error with TPM/RPM progress bar display in Jupyter Notebooks for
+   the `Auto` class only. This is caused by a `tqdm.notebook` bug where only the
+   initial totals (here, our limits) are used to calculate the width of the bar,
+   and the `Auto` class updates these values only after the first request. The
+   text percentage displays are accurate.
 
diff --git a/oaib/Auto.py b/oaib/Auto.py
@@ -2,10 +2,18 @@
 
 from .Batch import Batch
 from .utils import EXAMPLE, get_limits
+# from .config import config
 
 
 class Auto(Batch):
     def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        if self.azure:
+            raise ValueError(
+                "Auto does not support Azure or custom APIs. Manually set your TPM and RPM with Batch."
+            )
+
         if "rpm" in kwargs or "tpm" in kwargs:
             raise ValueError(
                 "Auto does not allow you to manually set your RPM or TPM. They will be set automatically using the limits provided by OpenAI's response headers."
@@ -14,8 +22,6 @@ def __init__(self, *args, **kwargs):
         self.__limits_loaded = Event()
         self.__model = None
 
-        super().__init__(*args, **kwargs)
-
     async def _process(self, *args, **kwargs):
         # If rate limits have not been loaded from response headers yet, do so.
         if self._headers is not None and not self.__limits_loaded.is_set():

diff --git a/oaib/Batch.py b/oaib/Batch.py
@@ -8,12 +8,14 @@
 from time import time
 from types import SimpleNamespace
 from tqdm.auto import tqdm
-from openai import AsyncOpenAI
+
+import openai
 
 from asyncio import ALL_COMPLETED
 from asyncio import Lock, Queue, Event, QueueEmpty, CancelledError, TimeoutError
 from asyncio import create_task, gather, wait, wait_for, sleep, all_tasks
 
+# from .config import AzureConfig
 from .utils import EXAMPLE, getattr_dot, cancel_all, get_limits
 from .utils import race, close_queue
 
@@ -63,28 +65,45 @@ def __init__(
         safety: float = 0.1,
         loglevel: int = 1,
         timeout: int = 60,
-        api_key: str or None = os.environ.get("OPENAI_API_KEY"),
+        azure=None,
+        api_key: str or None = None,
         logdir: str or None = "oaib.txt",
         index: list[str] or None = None,
         ** client_kwargs
     ):
+        api_key = api_key or (
+            os.environ.get("AZURE_OPENAI_KEY") if azure
+            else os.environ.get("OPENAI_API_KEY")
+        )
+
         if not api_key:
             raise ValueError(
                 "No OpenAI API key found. Please provide an `api_key` parameter or set the `OPENAI_API_KEY` environment variable."
             )
+
         if loglevel > 2:
             raise ValueError(
                 f"Allowable `loglevel` values are 0, 1, or 2; found {loglevel}")
 
-        self.client = AsyncOpenAI(api_key=api_key, **client_kwargs)
-
         self.rpm = rpm
         self.tpm = tpm
         self.safety = safety
         self.loglevel = loglevel
         self.timeout = timeout
         self.logdir = logdir
         self.index = index
+        self.azure = None
+
+        if azure:
+            azure = vars(azure)
+            self.azure = azure
+
+            self.client = openai.AsyncAzureOpenAI(
+                **{**azure, "api_key": api_key}, 
+                **client_kwargs
+            )
+        else:
+            self.client = openai.AsyncOpenAI(api_key=api_key, **client_kwargs)
 
         self.__num_workers = workers
 
@@ -110,8 +129,8 @@ def __clear_log(self):
         with open(self.logdir, "w") as file:
             file.write("")
 
-    def log(self, *messages, worker: int or None = None):
-        if self.loglevel > 0:
+    def log(self, *messages, worker: int or None = None, loglevel: int or None = None):
+        if (loglevel or self.loglevel) > 0:
             now = datetime.now()
             timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
 
@@ -227,6 +246,7 @@ async def _process(self, request, i=None):
 
         # Store one copy of response headers - for use by Auto subclass.
         if self._headers is None:
+            self.log(f"HEADERS | {dict(headers)}")
             self._headers = headers
 
         self.__totals.requests += 1
@@ -344,6 +364,11 @@ def __setup(self, callback=None, listening=False):
             bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"
         )
 
+        if self.azure:
+            self.log(
+                f"USING AZURE | {self.azure}"
+            )
+
     async def listen(self, callback=None):
         """
         Listens for incoming requests and processes them as they arrive. This method is non-blocking and can be used to process requests in real-time.

diff --git a/oaib/__init__.py b/oaib/__init__.py
@@ -1,2 +1,3 @@
 from .Batch import Batch
 from .Auto import Auto
+from .config import AzureConfig
diff --git a/oaib/config.py b/oaib/config.py
@@ -0,0 +1,16 @@
+from os import environ
+from types import SimpleNamespace
+
+
+class AzureConfig(SimpleNamespace):
+    def __init__(
+        self,
+        azure_endpoint: str or None = environ.get("AZURE_OPENAI_ENDPOINT"),
+        api_version: str or None = environ.get("AZURE_OPENAI_VERSION"),
+        api_key: str or None = environ.get("AZURE_OPENAI_KEY"),
+    ):
+        super().__init__(
+            azure_endpoint=azure_endpoint,
+            api_version=api_version,
+            api_key=api_key
+        )
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "oaib"
-version = "1.1.2"
+version = "1.2.0"
 requires-python = ">=3.9"
 description = "A Python library for making rate-limited, async batch requests to the OpenAI API."
 authors = [{ name = "CTJ Lewis", email = "lewis@spellcraft.org" }]

diff --git a/tests/test_azure.py b/tests/test_azure.py
@@ -0,0 +1,29 @@
+import os
+import pytest
+from oaib import Auto, Batch, AzureConfig
+
+azure = AzureConfig()
+
+
+async def test_azure():
+    batch = Batch(workers=8, azure=azure, loglevel=2)
+
+    n = 20
+    for i in range(n):
+        await batch.add(
+            "chat.completions.create",
+            model="Research",
+            messages=[{"role": "user", "content": "say hello"}]
+        )
+
+    chats = await batch.run()
+    assert len(chats) == n, f"Chat batch should return {n} results"
+    print(chats)
+
+    chat = chats.iloc[0].get("result")
+    assert chat['choices'], "Should get valid chat completions"
+
+
+async def test_azure_auto():
+    with pytest.raises(ValueError, match="Auto does not support Azure"):
+        auto_with_azure = Auto(azure=azure)
diff --git a/tests/test_columns.py b/tests/test_columns.py
@@ -1,15 +1,17 @@
-from oaib import Batch
+from oaib import Batch, AzureConfig
+
+azure = AzureConfig()
 
 
 async def test_columns():
     n = 20
-    batch = Batch(rpm=1000, tpm=10000, workers=5)
+    batch = Batch(rpm=1000, tpm=10000, workers=5, azure=azure)
 
     for i in range(n):
         await batch.add(
             "chat.completions.create",
             metadata={"id": i},
-            model="gpt-3.5-turbo",
+            model="Research",
             messages=[{"role": "user", "content": "say hello"}]
         )
 
@@ -21,15 +23,21 @@ async def test_columns():
 
 async def test_index():
     n = 5
-    batch = Batch(rpm=1000, tpm=10000, workers=5, index=["difficulty", "i"])
-    difficulties = ["easy", "medium", "hard"]
+    batch = Batch(
+        rpm=1000,
+        tpm=10000,
+        workers=5,
+        index=["difficulty", "i"],
+        azure=azure
+    )
 
+    difficulties = ["easy", "medium", "hard"]
     for difficulty in difficulties:
         for i in range(n):
             await batch.add(
                 "chat.completions.create",
                 metadata={"difficulty": difficulty, "i": i},
-                model="gpt-3.5-turbo",
+                model="Research",
                 messages=[
                     {"role": "user", "content": f"difficulty: {difficulty}\nwrite a math problem."}
                 ]

diff --git a/tests/test_large.py b/tests/test_large.py
@@ -1,11 +1,13 @@
-from oaib import Auto, Batch
+from oaib import Auto, Batch, AzureConfig
+
+azure = AzureConfig()
 
 
 async def test_large_auto():
     batch = Auto()
 
     # Large batch - Auto (fast)
-    n = 5_000
+    n = 1_000
     m = 10
     for i in range(n):
         await batch.add(
@@ -23,7 +25,7 @@ async def test_large_auto():
 
 
 async def test_large_batch():
-    batch = Batch()
+    batch = Batch(azure=azure)
 
     # Large batch - Batch (slow)
     n = 5_000