From 015b9312bf658e1e7f01faa96f756b736c1d5b23 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:17:14 -0400 Subject: [PATCH 1/5] Add max_memory parameter to run config Co-authored-by: Simon Schrader --- delphi/config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/delphi/config.py b/delphi/config.py index 05b723df..0cf6452e 100644 --- a/delphi/config.py +++ b/delphi/config.py @@ -191,6 +191,11 @@ class RunConfig(Serializable): ) """Number of GPUs to use for explanation and scoring.""" + max_memory: float = field( + default=0.9, + ) + """Fraction of GPU memory to allocate to running explainer model.""" + seed: int = field( default=22, ) From 9f7a8346ec1b482485d04e5cc6dce57dd25f8e40 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:24:41 -0400 Subject: [PATCH 2/5] Use configurable max_memory for offline explainer Co-authored-by: Simon Schrader --- delphi/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/__main__.py b/delphi/__main__.py index 064c34a2..a1cf6ca2 100644 --- a/delphi/__main__.py +++ b/delphi/__main__.py @@ -145,7 +145,7 @@ async def process_cache( if run_cfg.explainer_provider == "offline": llm_client = Offline( run_cfg.explainer_model, - max_memory=0.9, + max_memory=run_cfg.max_memory, # Explainer models context length - must be able to accommodate the longest # set of examples max_model_len=run_cfg.explainer_model_max_len, From 2ebda9ee6e97ab1f5d9519273d6b2e0dd5d2b473 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:30:26 -0400 Subject: [PATCH 3/5] Fix breaking change in prompt input formatting from vLLM Co-authored-by: Simon Schrader --- delphi/clients/offline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index 9dea693f..c517b34c 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -7,6 +7,7 @@ from transformers import AutoTokenizer from vllm import LLM, SamplingParams +from vllm.inputs import TokensPrompt from vllm.distributed.parallel_state import ( destroy_distributed_environment, destroy_model_parallel, @@ -103,6 +104,7 @@ async def process_func( prompt = self.tokenizer.apply_chat_template( batch, add_generation_prompt=True, tokenize=True ) + prompt = TokensPrompt(prompt_token_ids=prompt) prompts.append(prompt) if self.statistics: non_cached_tokens = len( @@ -121,7 +123,7 @@ async def process_func( None, partial( self.client.generate, # type: ignore - prompt_token_ids=prompts, + prompts, sampling_params=self.sampling_params, use_tqdm=False, ), From da8a0edebf5998ec7da0d254e1d8c662c20aac49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Oct 2025 20:26:19 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- delphi/clients/offline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index c517b34c..ecd07d37 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -7,11 +7,11 @@ from transformers import AutoTokenizer from vllm import LLM, SamplingParams -from vllm.inputs import TokensPrompt from vllm.distributed.parallel_state import ( destroy_distributed_environment, destroy_model_parallel, ) +from vllm.inputs import TokensPrompt from delphi import logger From 8c73363dbff21515499ab7066f923c1656f6eff4 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Sun, 5 Oct 2025 10:53:51 -0400 Subject: [PATCH 5/5] Update vllm dependency version to after API breaking change PR #18800: https://github.com/vllm-project/vllm/releases Co-authored-by: Simon Schrader --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 844cb085..554e8036 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "blobfile", "bitsandbytes", "flask", - "vllm", + "vllm>=0.10.2", "aiofiles", "sentence_transformers", "anyio>=4.8.0",