Skip to content

Commit

Permalink
fix: update maximal local concurrency limit based on API response (#242)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomas2D committed Dec 8, 2023
1 parent c497dd8 commit 48bec2d
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/genai/services/async_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,17 @@ async def _task(self, inputs, batch_num):
async def _schedule_requests(self):
local_concurrency_limit = max(self._max_concurrency_limit or math.inf, 1)

is_tokenize_request = self.fn == "tokenize"
if not is_tokenize_request:
max_generate_capacity = self.service.generate_limits().tokenCapacity
local_concurrency_limit = min(local_concurrency_limit, max_generate_capacity)

async def get_limits():
nonlocal local_concurrency_limit
if local_concurrency_limit <= 0:
return local_concurrency_limit

if self.fn == "tokenize":
if is_tokenize_request:
return min(local_concurrency_limit, len(self.prompts))

limits = self.service.generate_limits()
Expand Down

0 comments on commit 48bec2d

Please sign in to comment.