From 965a3ccb32a5bbb6cb1d8977c8307edd18c3cdb9 Mon Sep 17 00:00:00 2001 From: anthonyduong Date: Wed, 4 Jun 2025 16:51:07 -0700 Subject: [PATCH 1/7] fixes EmbeddingScorer._prepare() passes arg of wrong type --- delphi/scorers/embedding/embedding.py | 33 ++++++++++++++------------- delphi/scorers/surprisal/surprisal.py | 15 ++++-------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/delphi/scorers/embedding/embedding.py b/delphi/scorers/embedding/embedding.py index 2de89874..26943623 100644 --- a/delphi/scorers/embedding/embedding.py +++ b/delphi/scorers/embedding/embedding.py @@ -51,7 +51,7 @@ async def __call__( # type: ignore random.shuffle(samples) results = self._query( record.explanation, - samples, # type: ignore + samples, ) return ScorerResult(record=record, score=results) @@ -59,30 +59,31 @@ async def __call__( # type: ignore def call_sync(self, record: LatentRecord) -> list[EmbeddingOutput]: return asyncio.run(self.__call__(record)) # type: ignore - def _prepare(self, record: LatentRecord) -> list[list[Sample]]: + def _prepare(self, record: LatentRecord) -> list[Sample]: """ Prepare and shuffle a list of samples for classification. """ + samples = [] - defaults = { - "tokenizer": self.tokenizer, - } - samples = examples_to_samples( - record.extra_examples, # type: ignore - distance=-1, - **defaults, # type: ignore - ) + if record.extra_examples is not None: + samples.extend( + examples_to_samples( + record.extra_examples, + tokenizer=self.tokenizer, + distance=-1, + ) + ) - for i, examples in enumerate(record.test): + for i, example in enumerate(record.test): samples.extend( examples_to_samples( - examples, # type: ignore + [example], + tokenizer=self.tokenizer, distance=i + 1, - **defaults, # type: ignore ) ) - return samples # type: ignore + return samples def _query(self, explanation: str, samples: list[Sample]) -> list[EmbeddingOutput]: explanation_string = ( @@ -110,7 +111,7 @@ def _query(self, explanation: str, samples: list[Sample]) -> list[EmbeddingOutpu def examples_to_samples( examples: list[Example], - tokenizer: PreTrainedTokenizer, + tokenizer: PreTrainedTokenizer | None, **sample_kwargs, ) -> list[Sample]: samples = [] @@ -118,7 +119,7 @@ def examples_to_samples( if tokenizer is not None: text = "".join(tokenizer.batch_decode(example.tokens)) else: - text = "".join(example.tokens) + text = "".join(str(token) for token in example.tokens) activations = example.activations.tolist() samples.append( Sample( diff --git a/delphi/scorers/surprisal/surprisal.py b/delphi/scorers/surprisal/surprisal.py index ee92b1c1..931a4f5c 100644 --- a/delphi/scorers/surprisal/surprisal.py +++ b/delphi/scorers/surprisal/surprisal.py @@ -7,8 +7,6 @@ from torch.nn.functional import cross_entropy from transformers import PreTrainedTokenizer -from delphi.utils import assert_type - from ...latents import ActivatingExample, Example, LatentRecord from ..scorer import Scorer, ScorerResult from .prompts import BASEPROMPT as base_prompt @@ -74,24 +72,19 @@ def _prepare(self, record: LatentRecord) -> list[Sample]: Prepare and shuffle a list of samples for classification. """ - defaults = { - "tokenizer": self.tokenizer, - } - assert record.extra_examples is not None, "No extra examples provided" samples = examples_to_samples( record.extra_examples, + tokenizer=self.tokenizer, distance=-1, - **defaults, ) - for i, examples in enumerate(record.test): - examples = assert_type(list, examples) + for i, example in enumerate(record.test): samples.extend( examples_to_samples( - examples, + [example], + tokenizer=self.tokenizer, distance=i + 1, - **defaults, ) ) From 3809ed398fe49c9a71351b10c102ecde74753707 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Thu, 12 Jun 2025 08:49:58 -0400 Subject: [PATCH 2/7] Make str_tokens not optional --- delphi/latents/latents.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/delphi/latents/latents.py b/delphi/latents/latents.py index 5e5611c2..d1190932 100644 --- a/delphi/latents/latents.py +++ b/delphi/latents/latents.py @@ -75,12 +75,6 @@ class Example: activations: Float[Tensor, "ctx_len"] """Activation values for the input sequence.""" - str_tokens: list[str] | None = None - """Tokenized input sequence as strings.""" - - normalized_activations: Optional[Float[Tensor, "ctx_len"]] = None - """Activations quantized to integers in [0, 10].""" - @property def max_activation(self) -> float: """ @@ -98,6 +92,12 @@ class ActivatingExample(Example): An example of a latent that activates a model. """ + str_tokens: list[str] + """Tokenized input sequence as strings.""" + + normalized_activations: Float[Tensor, "ctx_len"] + """Activations quantized to integers in [0, 10].""" + quantile: int = 0 """The quantile of the activating example.""" @@ -108,6 +108,9 @@ class NonActivatingExample(Example): An example of a latent that does not activate a model. """ + str_tokens: list[str] + """Tokenized input sequence as strings.""" + distance: float = 0.0 """ The distance from the neighbouring latent. From 7f984043393555974584b35c6f31a44088b9da3c Mon Sep 17 00:00:00 2001 From: SrGonao Date: Thu, 12 Jun 2025 08:50:57 -0400 Subject: [PATCH 3/7] Correct logic for examples to samples --- delphi/scorers/embedding/embedding.py | 70 +++++++++++++-------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/delphi/scorers/embedding/embedding.py b/delphi/scorers/embedding/embedding.py index 26943623..0e6f44b7 100644 --- a/delphi/scorers/embedding/embedding.py +++ b/delphi/scorers/embedding/embedding.py @@ -1,9 +1,9 @@ import asyncio import random from dataclasses import dataclass -from typing import NamedTuple +from typing import NamedTuple, Sequence -from transformers import PreTrainedTokenizer +from delphi.latents.latents import ActivatingExample, NonActivatingExample from ...latents import Example, LatentRecord from ..scorer import Scorer, ScorerResult @@ -33,19 +33,17 @@ class EmbeddingScorer(Scorer): def __init__( self, model, - tokenizer: PreTrainedTokenizer | None = None, verbose: bool = False, **generation_kwargs, ): self.model = model self.verbose = verbose - self.tokenizer = tokenizer self.generation_kwargs = generation_kwargs - async def __call__( # type: ignore - self, # type: ignore - record: LatentRecord, # type: ignore - ) -> ScorerResult: # type: ignore + async def __call__( + self, + record: LatentRecord, + ) -> ScorerResult: samples = self._prepare(record) random.shuffle(samples) @@ -56,8 +54,8 @@ async def __call__( # type: ignore return ScorerResult(record=record, score=results) - def call_sync(self, record: LatentRecord) -> list[EmbeddingOutput]: - return asyncio.run(self.__call__(record)) # type: ignore + def call_sync(self, record: LatentRecord) -> ScorerResult: + return asyncio.run(self.__call__(record)) def _prepare(self, record: LatentRecord) -> list[Sample]: """ @@ -65,23 +63,21 @@ def _prepare(self, record: LatentRecord) -> list[Sample]: """ samples = [] - if record.extra_examples is not None: - samples.extend( - examples_to_samples( - record.extra_examples, - tokenizer=self.tokenizer, - distance=-1, - ) + assert ( + record.extra_examples is not None + ), "Extra (non-activating) examples need to be provided" + + samples.extend( + examples_to_samples( + record.extra_examples, ) + ) - for i, example in enumerate(record.test): - samples.extend( - examples_to_samples( - [example], - tokenizer=self.tokenizer, - distance=i + 1, - ) + samples.extend( + examples_to_samples( + record.test, ) + ) return samples @@ -94,38 +90,38 @@ def _query(self, explanation: str, samples: list[Sample]) -> list[EmbeddingOutpu query_embeding = self.model.encode(explanation_prompt) samples_text = [sample.text for sample in samples] - # # Temporary batching - # sample_embedings = [] - # for i in range(0, len(samples_text), 10): - # sample_embedings.extend(self.model.encode(samples_text[i:i+10])) sample_embedings = self.model.encode(samples_text) similarity = self.model.similarity(query_embeding, sample_embedings)[0] results = [] for i in range(len(samples)): - # print(i) samples[i].data.similarity = similarity[i].item() results.append(samples[i].data) return results def examples_to_samples( - examples: list[Example], - tokenizer: PreTrainedTokenizer | None, - **sample_kwargs, + examples: Sequence[Example], ) -> list[Sample]: samples = [] for example in examples: - if tokenizer is not None: - text = "".join(tokenizer.batch_decode(example.tokens)) - else: - text = "".join(str(token) for token in example.tokens) + assert isinstance(example, ActivatingExample) or isinstance( + example, NonActivatingExample + ) + text = "".join(str(token) for token in example.str_tokens) activations = example.activations.tolist() samples.append( Sample( text=text, activations=activations, - data=EmbeddingOutput(text=text, **sample_kwargs), + data=EmbeddingOutput( + text=text, + distance=( + example.quantile + if isinstance(example, ActivatingExample) + else example.distance + ), + ), ) ) From 94f3dd73d70ce21d3f069bac5322d31ace144988 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Thu, 12 Jun 2025 08:53:14 -0400 Subject: [PATCH 4/7] Clean up surprisal as well --- delphi/scorers/surprisal/surprisal.py | 52 ++++++++++++++------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/delphi/scorers/surprisal/surprisal.py b/delphi/scorers/surprisal/surprisal.py index 931a4f5c..1e10829c 100644 --- a/delphi/scorers/surprisal/surprisal.py +++ b/delphi/scorers/surprisal/surprisal.py @@ -1,13 +1,17 @@ import random from dataclasses import dataclass -from typing import NamedTuple +from typing import NamedTuple, Sequence import torch from simple_parsing import field from torch.nn.functional import cross_entropy -from transformers import PreTrainedTokenizer -from ...latents import ActivatingExample, Example, LatentRecord +from ...latents import ( + ActivatingExample, + Example, + LatentRecord, + NonActivatingExample, +) from ..scorer import Scorer, ScorerResult from .prompts import BASEPROMPT as base_prompt @@ -42,21 +46,19 @@ class SurprisalScorer(Scorer): def __init__( self, model, - tokenizer, verbose: bool, batch_size: int, **generation_kwargs, ): self.model = model self.verbose = verbose - self.tokenizer = tokenizer self.batch_size = batch_size self.generation_kwargs = generation_kwargs - async def __call__( # type: ignore - self, # type: ignore - record: LatentRecord, # type: ignore - ) -> ScorerResult: # type: ignore + async def __call__( + self, + record: LatentRecord, + ) -> ScorerResult: samples = self._prepare(record) random.shuffle(samples) @@ -75,27 +77,22 @@ def _prepare(self, record: LatentRecord) -> list[Sample]: assert record.extra_examples is not None, "No extra examples provided" samples = examples_to_samples( record.extra_examples, - tokenizer=self.tokenizer, - distance=-1, ) - for i, example in enumerate(record.test): - samples.extend( - examples_to_samples( - [example], - tokenizer=self.tokenizer, - distance=i + 1, - ) + samples.extend( + examples_to_samples( + record.test, ) + ) return samples def compute_loss_with_kv_cache( self, explanation: str, samples: list[Sample], batch_size=2 ): - # print(explanation_prompt) model = self.model tokenizer = self.model.tokenizer + assert tokenizer is not None, "Tokenizer is not set in model.tokenizer" # Tokenize explanation tokenizer.padding_side = "right" tokenizer.pad_token = tokenizer.eos_token @@ -180,20 +177,27 @@ def _query(self, explanation: str, samples: list[Sample]) -> list[SurprisalOutpu def examples_to_samples( - examples: list[Example] | list[ActivatingExample], - tokenizer: PreTrainedTokenizer, - **sample_kwargs, + examples: Sequence[Example], ) -> list[Sample]: samples = [] for example in examples: - text = "".join(tokenizer.batch_decode(example.tokens)) + assert isinstance(example, ActivatingExample) or isinstance( + example, NonActivatingExample + ) + text = "".join(str(token) for token in example.str_tokens) activations = example.activations.tolist() samples.append( Sample( text=text, activations=activations, data=SurprisalOutput( - activations=activations, text=text, **sample_kwargs + activations=activations, + text=text, + distance=( + example.quantile + if isinstance(example, ActivatingExample) + else example.distance + ), ), ) ) From 9dc9bb01e8ad2f3e12027f4f4648f97d6f24bcfd Mon Sep 17 00:00:00 2001 From: SrGonao Date: Thu, 12 Jun 2025 09:15:27 -0400 Subject: [PATCH 5/7] Optional arguments --- delphi/latents/latents.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/delphi/latents/latents.py b/delphi/latents/latents.py index d1190932..91a4b176 100644 --- a/delphi/latents/latents.py +++ b/delphi/latents/latents.py @@ -92,12 +92,12 @@ class ActivatingExample(Example): An example of a latent that activates a model. """ - str_tokens: list[str] - """Tokenized input sequence as strings.""" - - normalized_activations: Float[Tensor, "ctx_len"] + normalized_activations: Optional[Float[Tensor, "ctx_len"]] = None """Activations quantized to integers in [0, 10].""" + str_tokens: Optional[list[str]] = None + """Tokenized input sequence as strings.""" + quantile: int = 0 """The quantile of the activating example.""" @@ -128,7 +128,7 @@ class LatentRecord: """The latent associated with the record.""" examples: list[ActivatingExample] = field(default_factory=list) - """Example sequences where the latent activations, assumed to be sorted in + """Example sequences where the latent activates, assumed to be sorted in descending order by max activation.""" not_active: list[NonActivatingExample] = field(default_factory=list) From ab7f757fe96f9d1eeb6e317b94334236498361d3 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Thu, 12 Jun 2025 09:15:45 -0400 Subject: [PATCH 6/7] Asserts for typehints --- delphi/scorers/embedding/embedding.py | 1 + delphi/scorers/surprisal/surprisal.py | 1 + 2 files changed, 2 insertions(+) diff --git a/delphi/scorers/embedding/embedding.py b/delphi/scorers/embedding/embedding.py index 0e6f44b7..ed911866 100644 --- a/delphi/scorers/embedding/embedding.py +++ b/delphi/scorers/embedding/embedding.py @@ -108,6 +108,7 @@ def examples_to_samples( assert isinstance(example, ActivatingExample) or isinstance( example, NonActivatingExample ) + assert example.str_tokens is not None text = "".join(str(token) for token in example.str_tokens) activations = example.activations.tolist() samples.append( diff --git a/delphi/scorers/surprisal/surprisal.py b/delphi/scorers/surprisal/surprisal.py index 1e10829c..7f42be04 100644 --- a/delphi/scorers/surprisal/surprisal.py +++ b/delphi/scorers/surprisal/surprisal.py @@ -184,6 +184,7 @@ def examples_to_samples( assert isinstance(example, ActivatingExample) or isinstance( example, NonActivatingExample ) + assert example.str_tokens is not None text = "".join(str(token) for token in example.str_tokens) activations = example.activations.tolist() samples.append( From 6e016181e77516a488ec13ec5a8062fbc3f70a65 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Thu, 12 Jun 2025 09:16:13 -0400 Subject: [PATCH 7/7] Fixed constructors --- delphi/latents/constructors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/delphi/latents/constructors.py b/delphi/latents/constructors.py index 95aee37c..fa857bef 100644 --- a/delphi/latents/constructors.py +++ b/delphi/latents/constructors.py @@ -47,7 +47,6 @@ def prepare_non_activating_examples( NonActivatingExample( tokens=toks, activations=acts, - normalized_activations=None, distance=distance, str_tokens=tokenizer.batch_decode(toks), ) @@ -281,7 +280,6 @@ def constructor( ActivatingExample( tokens=toks, activations=acts, - normalized_activations=None, ) for toks, acts in zip(token_windows, act_windows) ]