From 015b9312bf658e1e7f01faa96f756b736c1d5b23 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:17:14 -0400
Subject: [PATCH 1/5] Add max_memory parameter to run config

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 delphi/config.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/delphi/config.py b/delphi/config.py
index 05b723df..0cf6452e 100644
--- a/delphi/config.py
+++ b/delphi/config.py
@@ -191,6 +191,11 @@ class RunConfig(Serializable):
     )
     """Number of GPUs to use for explanation and scoring."""
 
+    max_memory: float = field(
+        default=0.9,
+    )
+    """Fraction of GPU memory to allocate to running explainer model."""
+
     seed: int = field(
         default=22,
     )

From 9f7a8346ec1b482485d04e5cc6dce57dd25f8e40 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:24:41 -0400
Subject: [PATCH 2/5] Use configurable max_memory for offline explainer

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 delphi/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delphi/__main__.py b/delphi/__main__.py
index 064c34a2..a1cf6ca2 100644
--- a/delphi/__main__.py
+++ b/delphi/__main__.py
@@ -145,7 +145,7 @@ async def process_cache(
     if run_cfg.explainer_provider == "offline":
         llm_client = Offline(
             run_cfg.explainer_model,
-            max_memory=0.9,
+            max_memory=run_cfg.max_memory,
             # Explainer models context length - must be able to accommodate the longest
             # set of examples
             max_model_len=run_cfg.explainer_model_max_len,

From 2ebda9ee6e97ab1f5d9519273d6b2e0dd5d2b473 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:30:26 -0400
Subject: [PATCH 3/5] Fix breaking change in prompt input formatting from vLLM

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 delphi/clients/offline.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py
index 9dea693f..c517b34c 100644
--- a/delphi/clients/offline.py
+++ b/delphi/clients/offline.py
@@ -7,6 +7,7 @@
 
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
+from vllm.inputs import TokensPrompt
 from vllm.distributed.parallel_state import (
     destroy_distributed_environment,
     destroy_model_parallel,
@@ -103,6 +104,7 @@ async def process_func(
             prompt = self.tokenizer.apply_chat_template(
                 batch, add_generation_prompt=True, tokenize=True
             )
+            prompt = TokensPrompt(prompt_token_ids=prompt)
             prompts.append(prompt)
             if self.statistics:
                 non_cached_tokens = len(
@@ -121,7 +123,7 @@ async def process_func(
             None,
             partial(
                 self.client.generate,  # type: ignore
-                prompt_token_ids=prompts,
+                prompts,
                 sampling_params=self.sampling_params,
                 use_tqdm=False,
             ),

From da8a0edebf5998ec7da0d254e1d8c662c20aac49 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 2 Oct 2025 20:26:19 +0000
Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 delphi/clients/offline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py
index c517b34c..ecd07d37 100644
--- a/delphi/clients/offline.py
+++ b/delphi/clients/offline.py
@@ -7,11 +7,11 @@
 
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
-from vllm.inputs import TokensPrompt
 from vllm.distributed.parallel_state import (
     destroy_distributed_environment,
     destroy_model_parallel,
 )
+from vllm.inputs import TokensPrompt
 
 from delphi import logger
 

From 8c73363dbff21515499ab7066f923c1656f6eff4 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Sun, 5 Oct 2025 10:53:51 -0400
Subject: [PATCH 5/5] Update vllm dependency version to after API breaking
 change

PR #18800: https://github.com/vllm-project/vllm/releases

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 844cb085..554e8036 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
     "blobfile",
     "bitsandbytes",
     "flask",
-    "vllm",
+    "vllm>=0.10.2",
     "aiofiles",
     "sentence_transformers",
     "anyio>=4.8.0",