From fa2eb6fac4bf69399a8080c305b0940fb8110c93 Mon Sep 17 00:00:00 2001
From: Goncalo Paulo <goncalo@eleuther.ai>
Date: Fri, 7 Nov 2025 10:48:33 +0000
Subject: [PATCH 1/2] Update fuzz docstring

---
 delphi/scorers/classifier/fuzz.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/delphi/scorers/classifier/fuzz.py b/delphi/scorers/classifier/fuzz.py
index db76afb6..4d43d071 100644
--- a/delphi/scorers/classifier/fuzz.py
+++ b/delphi/scorers/classifier/fuzz.py
@@ -38,6 +38,10 @@ def __init__(
                         it harder for models to generate anwers in the correct format.
             log_prob: Whether to use log probabilities to allow for AUC calculation.
             generation_kwargs: Additional generation kwargs.
+            temperature: Which temperature to use for the scorer model.
+            fuzz_type: Which type of fuzzing to use. Default uses non-activating
+                examples and highlights n_incorrect tokens. Active uses activating
+                examples and highlights non-activating tokens.
         """
         super().__init__(
             client=client,

From 25ed88adeb86d0d6f2e1746ce15cab021b74eaa0 Mon Sep 17 00:00:00 2001
From: Goncalo Paulo <goncalo@eleuther.ai>
Date: Fri, 7 Nov 2025 10:49:36 +0000
Subject: [PATCH 2/2] Add fuzz type to config

---
 delphi/__main__.py | 1 +
 delphi/config.py   | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/delphi/__main__.py b/delphi/__main__.py
index a1cf6ca2..d69d7b10 100644
--- a/delphi/__main__.py
+++ b/delphi/__main__.py
@@ -256,6 +256,7 @@ def scorer_postprocess(result, score_dir):
                 n_examples_shown=run_cfg.num_examples_per_scorer_prompt,
                 verbose=run_cfg.verbose,
                 log_prob=run_cfg.log_probs,
+                fuzz_type=run_cfg.fuzz_type,
             )
         elif scorer_name == "detection":
             scorer = DetectionScorer(
diff --git a/delphi/config.py b/delphi/config.py
index 0cf6452e..09a9fbb0 100644
--- a/delphi/config.py
+++ b/delphi/config.py
@@ -160,6 +160,10 @@ class RunConfig(Serializable):
     )
     """Scorer methods to score latent explanations. Options are 'fuzz', 'detection', and
     'simulation'."""
+    fuzz_type: Literal["default", "active"] = "default"
+    """Type of fuzzing to use for the fuzz scorer. Default uses non-activating
+    examples and highlights n_incorrect tokens. Active uses activating examples
+    and highlights non-activating tokens."""
 
     name: str = ""
     """The name of the run. Results are saved in a directory with this name."""