From 015b9312bf658e1e7f01faa96f756b736c1d5b23 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:17:14 -0400 Subject: [PATCH 1/6] Add max_memory parameter to run config Co-authored-by: Simon Schrader --- delphi/config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/delphi/config.py b/delphi/config.py index 05b723df..0cf6452e 100644 --- a/delphi/config.py +++ b/delphi/config.py @@ -191,6 +191,11 @@ class RunConfig(Serializable): ) """Number of GPUs to use for explanation and scoring.""" + max_memory: float = field( + default=0.9, + ) + """Fraction of GPU memory to allocate to running explainer model.""" + seed: int = field( default=22, ) From 9f7a8346ec1b482485d04e5cc6dce57dd25f8e40 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:24:41 -0400 Subject: [PATCH 2/6] Use configurable max_memory for offline explainer Co-authored-by: Simon Schrader --- delphi/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/__main__.py b/delphi/__main__.py index 064c34a2..a1cf6ca2 100644 --- a/delphi/__main__.py +++ b/delphi/__main__.py @@ -145,7 +145,7 @@ async def process_cache( if run_cfg.explainer_provider == "offline": llm_client = Offline( run_cfg.explainer_model, - max_memory=0.9, + max_memory=run_cfg.max_memory, # Explainer models context length - must be able to accommodate the longest # set of examples max_model_len=run_cfg.explainer_model_max_len, From 2ebda9ee6e97ab1f5d9519273d6b2e0dd5d2b473 Mon Sep 17 00:00:00 2001 From: Kamal Maher <85698689+kmaherx@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:30:26 -0400 Subject: [PATCH 3/6] Fix breaking change in prompt input formatting from vLLM Co-authored-by: Simon Schrader --- delphi/clients/offline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index 9dea693f..c517b34c 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -7,6 +7,7 @@ from transformers import AutoTokenizer from vllm import LLM, SamplingParams +from vllm.inputs import TokensPrompt from vllm.distributed.parallel_state import ( destroy_distributed_environment, destroy_model_parallel, @@ -103,6 +104,7 @@ async def process_func( prompt = self.tokenizer.apply_chat_template( batch, add_generation_prompt=True, tokenize=True ) + prompt = TokensPrompt(prompt_token_ids=prompt) prompts.append(prompt) if self.statistics: non_cached_tokens = len( @@ -121,7 +123,7 @@ async def process_func( None, partial( self.client.generate, # type: ignore - prompt_token_ids=prompts, + prompts, sampling_params=self.sampling_params, use_tqdm=False, ), From da8a0edebf5998ec7da0d254e1d8c662c20aac49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Oct 2025 20:26:19 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- delphi/clients/offline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index c517b34c..ecd07d37 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -7,11 +7,11 @@ from transformers import AutoTokenizer from vllm import LLM, SamplingParams -from vllm.inputs import TokensPrompt from vllm.distributed.parallel_state import ( destroy_distributed_environment, destroy_model_parallel, ) +from vllm.inputs import TokensPrompt from delphi import logger From 0e9569420f0d2db018801a9c5d5bdfad7a1d57a2 Mon Sep 17 00:00:00 2001 From: Kamal Date: Fri, 3 Oct 2025 22:33:23 -0700 Subject: [PATCH 5/6] Fix bug in gemmascope device type check --- delphi/sparse_coders/custom/gemmascope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/sparse_coders/custom/gemmascope.py b/delphi/sparse_coders/custom/gemmascope.py index 27511d58..46be9012 100644 --- a/delphi/sparse_coders/custom/gemmascope.py +++ b/delphi/sparse_coders/custom/gemmascope.py @@ -104,6 +104,6 @@ def from_pretrained(cls, model_name_or_path, position, device): pt_params = {k: torch.from_numpy(v) for k, v in params.items()} model = cls(params["W_enc"].shape[0], params["W_enc"].shape[1]) model.load_state_dict(pt_params) - if device == "cuda": + if device == "cuda" or (isinstance(device, torch.device) and device.type == "cuda"): model.cuda() return model From 73b11851a1861bec9655685036e4c2b56f016752 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Oct 2025 05:14:01 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- delphi/sparse_coders/custom/gemmascope.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delphi/sparse_coders/custom/gemmascope.py b/delphi/sparse_coders/custom/gemmascope.py index 46be9012..5db9ceb7 100644 --- a/delphi/sparse_coders/custom/gemmascope.py +++ b/delphi/sparse_coders/custom/gemmascope.py @@ -104,6 +104,8 @@ def from_pretrained(cls, model_name_or_path, position, device): pt_params = {k: torch.from_numpy(v) for k, v in params.items()} model = cls(params["W_enc"].shape[0], params["W_enc"].shape[1]) model.load_state_dict(pt_params) - if device == "cuda" or (isinstance(device, torch.device) and device.type == "cuda"): + if device == "cuda" or ( + isinstance(device, torch.device) and device.type == "cuda" + ): model.cuda() return model