diff --git a/delphi/__main__.py b/delphi/__main__.py index a1cf6ca2..d69d7b10 100644 --- a/delphi/__main__.py +++ b/delphi/__main__.py @@ -256,6 +256,7 @@ def scorer_postprocess(result, score_dir): n_examples_shown=run_cfg.num_examples_per_scorer_prompt, verbose=run_cfg.verbose, log_prob=run_cfg.log_probs, + fuzz_type=run_cfg.fuzz_type, ) elif scorer_name == "detection": scorer = DetectionScorer( diff --git a/delphi/config.py b/delphi/config.py index 0cf6452e..09a9fbb0 100644 --- a/delphi/config.py +++ b/delphi/config.py @@ -160,6 +160,10 @@ class RunConfig(Serializable): ) """Scorer methods to score latent explanations. Options are 'fuzz', 'detection', and 'simulation'.""" + fuzz_type: Literal["default", "active"] = "default" + """Type of fuzzing to use for the fuzz scorer. Default uses non-activating + examples and highlights n_incorrect tokens. Active uses activating examples + and highlights non-activating tokens.""" name: str = "" """The name of the run. Results are saved in a directory with this name.""" diff --git a/delphi/scorers/classifier/fuzz.py b/delphi/scorers/classifier/fuzz.py index db76afb6..4d43d071 100644 --- a/delphi/scorers/classifier/fuzz.py +++ b/delphi/scorers/classifier/fuzz.py @@ -38,6 +38,10 @@ def __init__( it harder for models to generate anwers in the correct format. log_prob: Whether to use log probabilities to allow for AUC calculation. generation_kwargs: Additional generation kwargs. + temperature: Which temperature to use for the scorer model. + fuzz_type: Which type of fuzzing to use. Default uses non-activating + examples and highlights n_incorrect tokens. Active uses activating + examples and highlights non-activating tokens. """ super().__init__( client=client,