diff --git a/src/art/local/backend.py b/src/art/local/backend.py index c9f79e3e..48465ff6 100644 --- a/src/art/local/backend.py +++ b/src/art/local/backend.py @@ -315,11 +315,13 @@ async def _monitor_openai_server( if running_requests == 0 and pending_requests == 0: try: # Send a health check with a 5 second timeout - await openai_client.completions.create( + timeout = float( + os.environ.get("ART_SERVER_MONITOR_TIMEOUT", 5.0) + ) + # Send a health check with a 5 second timeout + await openai_client.models.retrieve( model=model_name, - prompt="Hi", - max_tokens=1, - timeout=5, + timeout=timeout, ) except Exception as e: # If the server is sleeping, a failed health check is okay