From 015b9312bf658e1e7f01faa96f756b736c1d5b23 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:17:14 -0400
Subject: [PATCH 1/6] Add max_memory parameter to run config

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 delphi/config.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/delphi/config.py b/delphi/config.py
index 05b723df..0cf6452e 100644
--- a/delphi/config.py
+++ b/delphi/config.py
@@ -191,6 +191,11 @@ class RunConfig(Serializable):
     )
     """Number of GPUs to use for explanation and scoring."""
 
+    max_memory: float = field(
+        default=0.9,
+    )
+    """Fraction of GPU memory to allocate to running explainer model."""
+
     seed: int = field(
         default=22,
     )

From 9f7a8346ec1b482485d04e5cc6dce57dd25f8e40 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:24:41 -0400
Subject: [PATCH 2/6] Use configurable max_memory for offline explainer

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 delphi/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delphi/__main__.py b/delphi/__main__.py
index 064c34a2..a1cf6ca2 100644
--- a/delphi/__main__.py
+++ b/delphi/__main__.py
@@ -145,7 +145,7 @@ async def process_cache(
     if run_cfg.explainer_provider == "offline":
         llm_client = Offline(
             run_cfg.explainer_model,
-            max_memory=0.9,
+            max_memory=run_cfg.max_memory,
             # Explainer models context length - must be able to accommodate the longest
             # set of examples
             max_model_len=run_cfg.explainer_model_max_len,

From 2ebda9ee6e97ab1f5d9519273d6b2e0dd5d2b473 Mon Sep 17 00:00:00 2001
From: Kamal Maher <85698689+kmaherx@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:30:26 -0400
Subject: [PATCH 3/6] Fix breaking change in prompt input formatting from vLLM

Co-authored-by: Simon Schrader <simonschrader96@gmail.com>
---
 delphi/clients/offline.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py
index 9dea693f..c517b34c 100644
--- a/delphi/clients/offline.py
+++ b/delphi/clients/offline.py
@@ -7,6 +7,7 @@
 
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
+from vllm.inputs import TokensPrompt
 from vllm.distributed.parallel_state import (
     destroy_distributed_environment,
     destroy_model_parallel,
@@ -103,6 +104,7 @@ async def process_func(
             prompt = self.tokenizer.apply_chat_template(
                 batch, add_generation_prompt=True, tokenize=True
             )
+            prompt = TokensPrompt(prompt_token_ids=prompt)
             prompts.append(prompt)
             if self.statistics:
                 non_cached_tokens = len(
@@ -121,7 +123,7 @@ async def process_func(
             None,
             partial(
                 self.client.generate,  # type: ignore
-                prompt_token_ids=prompts,
+                prompts,
                 sampling_params=self.sampling_params,
                 use_tqdm=False,
             ),

From da8a0edebf5998ec7da0d254e1d8c662c20aac49 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 2 Oct 2025 20:26:19 +0000
Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 delphi/clients/offline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py
index c517b34c..ecd07d37 100644
--- a/delphi/clients/offline.py
+++ b/delphi/clients/offline.py
@@ -7,11 +7,11 @@
 
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
-from vllm.inputs import TokensPrompt
 from vllm.distributed.parallel_state import (
     destroy_distributed_environment,
     destroy_model_parallel,
 )
+from vllm.inputs import TokensPrompt
 
 from delphi import logger
 

From 0e9569420f0d2db018801a9c5d5bdfad7a1d57a2 Mon Sep 17 00:00:00 2001
From: Kamal <kamal.m.maher@gmail.com>
Date: Fri, 3 Oct 2025 22:33:23 -0700
Subject: [PATCH 5/6] Fix bug in gemmascope device type check

---
 delphi/sparse_coders/custom/gemmascope.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delphi/sparse_coders/custom/gemmascope.py b/delphi/sparse_coders/custom/gemmascope.py
index 27511d58..46be9012 100644
--- a/delphi/sparse_coders/custom/gemmascope.py
+++ b/delphi/sparse_coders/custom/gemmascope.py
@@ -104,6 +104,6 @@ def from_pretrained(cls, model_name_or_path, position, device):
         pt_params = {k: torch.from_numpy(v) for k, v in params.items()}
         model = cls(params["W_enc"].shape[0], params["W_enc"].shape[1])
         model.load_state_dict(pt_params)
-        if device == "cuda":
+        if device == "cuda" or (isinstance(device, torch.device) and device.type == "cuda"):
             model.cuda()
         return model

From 73b11851a1861bec9655685036e4c2b56f016752 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 15 Oct 2025 05:14:01 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 delphi/sparse_coders/custom/gemmascope.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/delphi/sparse_coders/custom/gemmascope.py b/delphi/sparse_coders/custom/gemmascope.py
index 46be9012..5db9ceb7 100644
--- a/delphi/sparse_coders/custom/gemmascope.py
+++ b/delphi/sparse_coders/custom/gemmascope.py
@@ -104,6 +104,8 @@ def from_pretrained(cls, model_name_or_path, position, device):
         pt_params = {k: torch.from_numpy(v) for k, v in params.items()}
         model = cls(params["W_enc"].shape[0], params["W_enc"].shape[1])
         model.load_state_dict(pt_params)
-        if device == "cuda" or (isinstance(device, torch.device) and device.type == "cuda"):
+        if device == "cuda" or (
+            isinstance(device, torch.device) and device.type == "cuda"
+        ):
             model.cuda()
         return model