From d88b27ee8e70774897c397fb35df30744879e9aa Mon Sep 17 00:00:00 2001 From: Lucia Quirke Date: Sun, 16 Feb 2025 06:02:16 +0000 Subject: [PATCH 1/4] Update log -> verbose --- .gitignore | 1 + delphi/__main__.py | 2 +- delphi/config.py | 10 +++++----- delphi/log/result_analysis.py | 6 ++++-- delphi/tests/e2e.py | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 7b48a2a0..313c5e82 100644 --- a/.gitignore +++ b/.gitignore @@ -180,3 +180,4 @@ cython_debug/ # Development artifacts results/ +.vscode/ diff --git a/delphi/__main__.py b/delphi/__main__.py index cfac93df..ee57cd04 100644 --- a/delphi/__main__.py +++ b/delphi/__main__.py @@ -315,7 +315,7 @@ async def run( else: print(f"Files found in {scores_path}, skipping...") - if run_cfg.log: + if run_cfg.verbose: log_results(scores_path, visualize_path, run_cfg.hookpoints) diff --git a/delphi/config.py b/delphi/config.py index 9a61a776..7d12b64e 100644 --- a/delphi/config.py +++ b/delphi/config.py @@ -133,18 +133,18 @@ class RunConfig: ) """Seed for the random number generator.""" - log: bool = field( + verbose: bool = field( default=True, ) """Whether to log summary statistics and results of the run.""" - overwrite: list[str] = list_field() - """Whether to overwrite existing parts of the run. Options are 'cache', 'scores', - and 'visualize'.""" - num_examples_per_scorer_prompt: int = field( default=5, ) """Number of examples to use for each scorer prompt. Using more than 1 improves scoring speed but can leak information to the fuzzing and detection scorer, as well as increasing the scorer LLM task difficulty.""" + + overwrite: list[Literal["cache", "scores"]] = list_field() + """List of run stages to recompute. This is a debugging tool + and may be removed in the future.""" diff --git a/delphi/log/result_analysis.py b/delphi/log/result_analysis.py index 0fe2fea3..1cccf0a6 100644 --- a/delphi/log/result_analysis.py +++ b/delphi/log/result_analysis.py @@ -10,7 +10,9 @@ pio.kaleido.scope.mathjax = None # https://github.com/plotly/plotly.py/issues/3469 -def latent_balanced_score_metrics(df: pd.DataFrame, score_type: str, log: bool = True): +def latent_balanced_score_metrics( + df: pd.DataFrame, score_type: str, verbose: bool = True +): # Calculate weights based on non-errored examples valid_examples = df["total_examples"] weights = valid_examples / valid_examples.sum() @@ -34,7 +36,7 @@ def latent_balanced_score_metrics(df: pd.DataFrame, score_type: str, log: bool = "false_negative_rate": np.average(df["false_negative_rate"], weights=weights), } - if log: + if verbose: print(f"\n--- {score_type.title()} Metrics ---") print(f"Accuracy: {metrics['accuracy']:.3f}") print(f"F1 Score: {metrics['f1_score']:.3f}") diff --git a/delphi/tests/e2e.py b/delphi/tests/e2e.py index da901ff4..ff2e40a4 100644 --- a/delphi/tests/e2e.py +++ b/delphi/tests/e2e.py @@ -54,7 +54,7 @@ async def test(): for score_type in df["score_type"].unique(): score_df = df[df["score_type"] == score_type] weighted_mean_metrics = latent_balanced_score_metrics( - score_df, score_type, log=False + score_df, score_type, verbose=False ) accuracy = weighted_mean_metrics["accuracy"] From 09e48103e1232f6be32af52b6eabf50eceeecff8 Mon Sep 17 00:00:00 2001 From: Lucia Quirke Date: Sun, 16 Feb 2025 06:51:10 +0000 Subject: [PATCH 2/4] fix latent_contexts --- delphi/__main__.py | 5 +- delphi/latents/loader.py | 147 ++++++++++++++++++-- delphi/latents/neighbours.py | 2 + delphi/tests/e2e.py | 1 + examples/latent_contexts.ipynb | 241 +++++++++++++-------------------- 5 files changed, 239 insertions(+), 157 deletions(-) diff --git a/delphi/__main__.py b/delphi/__main__.py index ee57cd04..0ed84479 100644 --- a/delphi/__main__.py +++ b/delphi/__main__.py @@ -147,6 +147,7 @@ def explainer_postprocess(result): client, tokenizer=dataset.tokenizer, threshold=0.3, + verbose=run_cfg.verbose, ), postprocess=explainer_postprocess, ) @@ -171,7 +172,7 @@ def scorer_postprocess(result, score_dir): client, tokenizer=dataset.tokenizer, # type: ignore batch_size=run_cfg.num_examples_per_scorer_prompt, - verbose=False, + verbose=run_cfg.verbose, log_prob=False, ), preprocess=scorer_preprocess, @@ -182,7 +183,7 @@ def scorer_postprocess(result, score_dir): client, tokenizer=dataset.tokenizer, # type: ignore batch_size=run_cfg.num_examples_per_scorer_prompt, - verbose=False, + verbose=run_cfg.verbose, log_prob=False, ), preprocess=scorer_preprocess, diff --git a/delphi/latents/loader.py b/delphi/latents/loader.py index 47b5b03b..7aa47d03 100644 --- a/delphi/latents/loader.py +++ b/delphi/latents/loader.py @@ -43,7 +43,7 @@ class TensorBuffer: Lazy loading buffer for cached splits. """ - path: str + path: Path | str """Path to the tensor file.""" module_path: str @@ -80,7 +80,12 @@ def __iter__(self): def load(self): split_data = load_file(self.path) - first_latent = int(self.path.split("/")[-1].split("_")[0]) + + first_latent = ( + int(self.path.stem.split("_")[0]) + if isinstance(self.path, Path) + else int(self.path.split("/")[-1].split("_")[0]) + ) activations = torch.tensor(split_data["activations"]) locations = torch.tensor(split_data["locations"].astype(np.int64)) if "tokens" in split_data: @@ -121,7 +126,7 @@ class LatentDataset: def __init__( self, - raw_dir: str, + raw_dir: Path, cfg: LatentConfig, tokenizer: Optional[Callable] = None, modules: Optional[list[str]] = None, @@ -147,7 +152,7 @@ def __init__( else: self._build_selected(raw_dir, modules, latents) # TODO: this assumes that all modules have the same config - cache_config_dir = f"{raw_dir}/{modules[0]}/config.json" + cache_config_dir = raw_dir / modules[0] / "config.json" with open(cache_config_dir, "r") as f: cache_config = json.load(f) if tokenizer is None: @@ -181,8 +186,8 @@ def load_tokens(self): ) return self.tokens - def _edges(self, raw_dir: str, module: str) -> list[tuple[int, int]]: - module_dir = Path(raw_dir) / module + def _edges(self, raw_dir: Path, module: str) -> list[tuple[int, int]]: + module_dir = raw_dir / module safetensor_files = [f for f in module_dir.glob("*.safetensors")] edges = [] for file in safetensor_files: @@ -191,27 +196,27 @@ def _edges(self, raw_dir: str, module: str) -> list[tuple[int, int]]: edges.sort(key=lambda x: x[0]) return edges - def _build(self, raw_dir: str, modules: Optional[list[str]] = None): + def _build(self, raw_dir: Path, modules: Optional[list[str]] = None): """ Build dataset buffers which load all cached latents. Args: - raw_dir (str): Directory containing raw latent data. + raw_dir (Path): Directory containing raw latent data. modules (Optional[list[str]]): list of module names to include. """ - modules = os.listdir(raw_dir) if modules is None else modules + modules = os.listdir(str(raw_dir)) if modules is None else modules for module in modules: edges = self._edges(raw_dir, module) for start, end in edges: - path = f"{raw_dir}/{module}/{start}_{end}.safetensors" + path = raw_dir / module / f"{start}_{end}.safetensors" self.buffers.append( TensorBuffer(path, module, min_examples=self.cfg.min_examples) ) def _build_selected( self, - raw_dir: str, + raw_dir: Path, modules: list[str], latents: dict[str, Union[int, torch.Tensor]], ): @@ -322,3 +327,123 @@ async def _aprocess_latent(self, buffer_output: BufferOutput) -> LatentRecord: if self.transform is not None: self.transform(record) return record + + +class LatentLoader: + """ + Loader class for processing latent records from a LatentDataset. + """ + + def __init__( + self, + latent_dataset: "LatentDataset", + constructor: Optional[Callable] = None, + sampler: Optional[Callable] = None, + transform: Optional[Callable] = None, + ): + """ + Initialize a LatentLoader. + + Args: + latent_dataset (LatentDataset): The dataset to load latents from. + constructor (Optional[Callable]): Function to construct latent records. + sampler (Optional[Callable]): Function to sample from latent records. + transform (Optional[Callable]): Function to transform latent records. + """ + self.latent_dataset = latent_dataset + self.constructor = constructor + self.sampler = sampler + self.transform = transform + + async def __aiter__(self): + """ + Asynchronous iterator for processing latent records. + + Yields: + LatentRecord: Processed latent records. + """ + for buffer in self.latent_dataset.buffers: + async for record in self._aprocess_buffer(buffer): + yield record + + async def _aprocess_buffer(self, buffer): + """ + Asynchronously process a buffer. + + Args: + buffer (TensorBuffer): Buffer to process. + + Yields: + Optional[LatentRecord]: Processed latent record or None. + """ + for data in buffer: + if data is not None: + record = await self._aprocess_latent(data) + if record is not None: + yield record + await asyncio.sleep(0) # Allow other coroutines to run + + async def _aprocess_latent(self, buffer_output): + """ + Asynchronously process a single latent. + + Args: + buffer_output (BufferOutput): Latent data to process. + + Returns: + Optional[LatentRecord]: Processed latent record or None. + """ + record = LatentRecord(buffer_output.latent) + if self.constructor is not None: + self.constructor(record=record, buffer_output=buffer_output) + if self.sampler is not None: + self.sampler(record) + if self.transform is not None: + self.transform(record) + return record + + def __iter__(self): + """ + Synchronous iterator for processing latent records. + + Yields: + LatentRecord: Processed latent records. + """ + for buffer in self.latent_dataset.buffers: + for record in self._process_buffer(buffer): + yield record + + def _process_buffer(self, buffer): + """ + Process a buffer synchronously. + + Args: + buffer (TensorBuffer): Buffer to process. + + Yields: + Optional[LatentRecord]: Processed latent record or None. + """ + for data in buffer: + if data is not None: + record = self._process_latent(data) + if record is not None: + yield record + + def _process_latent(self, buffer_output): + """ + Process a single latent synchronously. + + Args: + buffer_output (BufferOutput): Latent data to process. + + Returns: + Optional[LatentRecord]: Processed latent record or None. + """ + record = LatentRecord(buffer_output.latent) + if self.constructor is not None: + self.constructor(record=record, buffer_output=buffer_output) + if self.sampler is not None: + self.sampler(record) + if self.transform is not None: + self.transform(record) + return record diff --git a/delphi/latents/neighbours.py b/delphi/latents/neighbours.py index d5b9d86b..90a4e587 100644 --- a/delphi/latents/neighbours.py +++ b/delphi/latents/neighbours.py @@ -7,6 +7,8 @@ import torch from safetensors.numpy import load_file +from delphi.latents.loader import LatentDataset + class NeighbourCalculator: """ diff --git a/delphi/tests/e2e.py b/delphi/tests/e2e.py index ff2e40a4..c8eae835 100644 --- a/delphi/tests/e2e.py +++ b/delphi/tests/e2e.py @@ -41,6 +41,7 @@ async def test(): seed=22, num_gpus=torch.cuda.device_count(), filter_bos=True, + verbose=True, ) start_time = time.time() diff --git a/examples/latent_contexts.ipynb b/examples/latent_contexts.ipynb index c7ee3135..655e4ab9 100644 --- a/examples/latent_contexts.ipynb +++ b/examples/latent_contexts.ipynb @@ -9,40 +9,39 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'ipywidgets'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mfunctools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m partial\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mipywidgets\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mwidgets\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mIPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdisplay\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m HTML, clear_output, display\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'ipywidgets'" - ] - } - ], + "outputs": [], "source": [ "\n", "import json\n", "from functools import partial\n", + "from pathlib import Path\n", "\n", "import ipywidgets as widgets\n", "import torch\n", "from IPython.display import HTML, clear_output, display\n", - "from transformers import AutoModel\n", + "from transformers import AutoTokenizer\n", "\n", "from delphi.config import ExperimentConfig, LatentConfig\n", - "from delphi.latents import LatentDataset, LatentLoader\n", + "from delphi.latents import LatentDataset\n", + "from delphi.latents.loader import LatentLoader\n", "from delphi.latents.constructors import default_constructor\n", "from delphi.latents.samplers import sample\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-2-9b\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -96,17 +95,13 @@ " return array\n", "\n", "def tokens_and_activations_to_html(toks, activations, tokenizer, logit_diffs=None, model_type=\"causal\"):\n", - " # text_spacing = \"0.07em\"\n", " text_spacing = \"0.00em\"\n", " toks = convert_token_array_to_list(toks)\n", " activations = convert_token_array_to_list(activations)\n", - " # toks = [[tokenizer.decode(t).replace('Ġ', ' ').replace('\\n', '↵') for t in tok] for tok in toks]\n", " toks = [[tokenizer.decode(t).replace('Ġ', ' ').replace('\\n', '\\\\n') for t in tok] for tok in toks]\n", " print(len(activations))\n", " print(len(toks))\n", " highlighted_text = []\n", - " # Make background black\n", - " # highlighted_text.append('')\n", " highlighted_text.append(\"\"\"\n", "\n", "\"\"\")\n", @@ -138,42 +133,83 @@ " text_color, background_color = value_to_color(reward_change, 10, -10)\n", " highlighted_text.append(f'
Reward: {reward_change:.2f}')\n", " highlighted_text.append('
')\n", - " # highlighted_text.append('

')\n", - " # highlighted_text.append('')\n", " highlighted_text = ''.join(highlighted_text)\n", " return highlighted_text\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Edges [(0, 26213), (26214, 52427), (52428, 78642), (78643, 104856), (104857, 131071)]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0a594d12fbae4e8d816c7e6e94ae7228", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(Text(value='', description='Explanation:'), Button(description='Submit', style=ButtonStyle()), …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "layers.10_latent1\n", + "40\n", + "40\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "Token Activations:  0.0  0.2  0.5  0.7  1.0 
 of all of his committee memberships except Foreign Relations. In 1850, Benton was still opposed to the series of measures known as the “Great Compromise
 of his diocese, and when Bishop Rosati was appointed his coadjutor, New Orleans became again his residence. In 1826 Bishop Dubourg
 technologies available today. In the late '90s, there were two “critical concerns in the digital audio industry: Year of 2000
s\\nfew remaining high quality natural areas; this site was dedicated as a\\npreserve in 1970. In 1986 it was
ina became a federal judge in New York. In 1949, he presided over the trial of 11 leaders of the U.S.
both in Europe). Henderson actually got the jump on the others, making measurements of Alpha Centauri. In 1833, he packed up and went
 intelligence (AI). In 1990-91, while at McGill University, Montreal, Canada, based on this approach.\\nDr. Stil
\\nThe lead-acid wet cell reigned as the dominantwell, onlyform of rechargeable battery for 40 years. In 1899
 how this country defined treason and conspiracy. In 1945, the U.S. Supreme Court reviewed a conviction for treason, the only crime described
 retired at first to Bordeaux. In 1794 he emigrated to the United States where he was welcomed by Bishop Carroll. He was president of Georgetown College
 ever done before. He fought back, both in the courts and in the court of public opinion.\\nIn 1965, he organized one of
 shrapnel deep into her foot. In October 2007, she returned to work after recovering from her injuries. She is the fourth AAFES
th century the main overseas and colonial forces of the French military. In 1900 they were put under the orders of the War Ministry and took the
side in the Department of the Ohio, seeing action in Kentucky and eastern Tennessee. In May 1864, the regiment rejoined the Army of the Potomac
1963, Kenya attained independence (Uhuru) with Mzee Jomo Kenyatta\\nas the first Prime Minister. The day was named Uhuru Day.
1899, two lawyers paid a visit to the president of Coca-Cola. At the time, Coke was sold at soda fountains. But the lawyers
 Arab independence. In June 1918, Rania set up another club in Jerusalem, which became a haven for the ANZACs on leave from
2012, six partnerships received funding. The 2012 projects were conducted during the 2012 – 2013
\\nSome cooperative ventures made real money. “In the fall of 1932 Gary, Indiana, was ravaged by the depression, the steel mills were
 stand on. Finally, in 1886, the statue was dedicated.\\n- Engineer Gustave Eiffel, who would later design the Eiffel Tower in Paris
808, the Abbé Grégoire included her on his list of the courageous men who pleaded the cause of \"les nègres.\"\\n-A passionate advocate
Risk Capital via Family Money 1940s\\nDuring the 1930s, the heirs to U.S. family fortunes
 Coastal Defence Division (the Blue Berets), therefore it is sometimes referred to as the Marines of Poland. However, as of 2010 there are
82, one needed the equivalent of $200 million in current dollars. By 2005, it took $900 million to
, since Webster knew that his speech would make him unelectable in Massachusetts thereafter. On July 22, 1850, Webster resigned from the
 Time4Learning mom who uses lapbooks in her homeschool curriculum:\\n\"Since 2001, my kids have created a lapbook almost every week
02. Around 1804, the Spanish built a tower at Punta Na Radona to protect the beach at Son Bou, Minorca. In
 King Alaungpaya In 1752. In the 19lh century, during the peak period of colonialism, Myanmar was annexed in three stages
 the unit suffered 35 percent casualties. From March 1863 to January 1864, the 21st served with Burn
 he gradually overcame his opponents. On January 23, 1815, on the threshold of the New Orleans cathedral, he bestowed on General Jackson
1 had risen to become the youngest army chief of staff in Israeli history. Known for his bravery, Barak led two successful commando raids, one that stormed a hijack
s team was ordered to set up a program to weaponize the biological agents. By January 1991, a team of 100 scientists
 farms, churches, schools and businesses in their new homeland in Russia.\\nIn the later part of the 19th century, these ethnic Germans looked to
 government of Afghanistan, where terrorists linked to the 9/11 disaster were given sanctuary. By December 2002, AAFES operated more
 “U.S. Solar Market Insight: Third Quarter 2012.” By the end of third quarter 2012, 1,
, 2003\\nThe womens-to-mens earnings ratio varies by race and Hispanic origin.\\nIn 200
.\\nPrior to 2001, food security and food safety were viewed as the same. Large food manufacturers were geared to protect their assets: materials
 with what became Silicon Valley.\\nBuilding Blocks of Entrepreneurship\\nBy the mid 1950s the groundwork for a culture and environment of entrepreneurs
 in butchering practices seen in the head compared to the shin bone.\\nWhat we know about the colonists\\nIn the summer of 1609,
 from 2006 to 2010.\\n- Ratcheting Down – Beginning in 2011, the cap would be
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ + "def load_examples(tokenizer, layer_name=None, sae_size=\"131k\"):\n", + " latent_cfg = LatentConfig()\n", "\n", - " \n", - "def load_examples(layer_name=None,sae_size=\"131k\"):\n", - " \n", - " latent_cfg = LatentConfig(width=131072)\n", - " raw_dir = f\"raw_latents/gemma/{sae_size}\"\n", - " experiment_cfg = ExperimentConfig(n_random=0,train_type=\"quantiles\",n_examples_train=40,n_quantiles=10,example_ctx_len=32)\n", + " raw_dir = Path(f\"/mnt/ssd-1/lucia/delphi-2/results/gemma-baseline/latents\")\n", + " experiment_cfg = ExperimentConfig(n_non_activating=0)\n", + "\n", + " module = f\"layers.{layer_name}\"\n", "\n", - " #module = f\".model.layers.{layer_name}.post_feedforward_layernorm\"\n", - " module = f\".model.layers.{layer_name}\"\n", - " \n", " dataset = LatentDataset(\n", " raw_dir=raw_dir,\n", " cfg=latent_cfg,\n", " modules=[module],\n", - " latents={module:torch.tensor([3254, 6517, 8812,1318, 4834, 7605,])},\n", + " latents={module: torch.arange(100)},\n", " )\n", - " constructor=partial(\n", - " default_constructor,\n", - " n_random=experiment_cfg.n_random, \n", - " ctx_len=32, \n", - " max_examples=10000\n", - " )\n", - " sampler=partial(sample,cfg=experiment_cfg)\n", + " constructor = partial(\n", + " default_constructor,\n", + " token_loader=None,\n", + " n_not_active=experiment_cfg.n_non_activating,\n", + " ctx_len=experiment_cfg.example_ctx_len,\n", + " max_examples=latent_cfg.max_examples,\n", + " )\n", + " sampler = partial(sample, cfg=experiment_cfg)\n", " loader = LatentLoader(dataset, constructor=constructor, sampler=sampler)\n", "\n", " all_examples = {}\n", @@ -185,11 +221,14 @@ "\n", " return all_examples, maximum_activations\n", "\n", - "def plot_examples(layer_name=None,sae_size=\"131k\"):\n", - " all_examples, maximum_activations = load_examples(layer_name,sae_size)\n", + "\n", + "def plot_examples(tokenizer, layer_name=None, sae_size=\"131k\"):\n", + " all_examples, maximum_activations = load_examples(tokenizer, layer_name, sae_size)\n", " keys = list(all_examples.keys())\n", - " \n", - " current_index = [0] # Use a list to store the current index so it can be modified in the callback\n", + "\n", + " current_index = [\n", + " 0\n", + " ] # Use a list to store the current index so it can be modified in the callback\n", " explanations = {} # Dictionary to store explanations\n", "\n", " def display_example(index):\n", @@ -199,11 +238,17 @@ " list_activations = []\n", " for example in all_examples[key]:\n", " example_tokens = example.tokens\n", - " activations = example.activations/maximum_activations[key]\n", + " activations = example.activations / maximum_activations[key]\n", " list_tokens.append(example_tokens)\n", " list_activations.append(activations.tolist())\n", "\n", - " display(HTML(tokens_and_activations_to_html(list_tokens, list_activations, model.tokenizer)))\n", + " display(\n", + " HTML(\n", + " tokens_and_activations_to_html(\n", + " list_tokens, list_activations, tokenizer\n", + " )\n", + " )\n", + " )\n", "\n", " def on_submit(b):\n", " key = keys[current_index[0]]\n", @@ -212,13 +257,13 @@ " clear_output(wait=True)\n", " display(widgets.HBox([text_box, submit_button, skip_button, save_button]))\n", " display_example(current_index[0])\n", - " \n", + "\n", " def on_skip(b):\n", " current_index[0] = (current_index[0] + 1) % len(keys)\n", " clear_output(wait=True)\n", " display(widgets.HBox([text_box, submit_button, skip_button, save_button]))\n", " display_example(current_index[0])\n", - " \n", + "\n", " def on_save(b):\n", " with open(f\"{layer_name}_explanations.json\", \"w\") as f:\n", " json.dump(explanations, f)\n", @@ -233,101 +278,9 @@ " save_button.on_click(on_save)\n", " display(widgets.HBox([text_box, submit_button, skip_button, save_button]))\n", " display_example(current_index[0])\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading the model and the tokens" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4bfdb4414e2b4010ac7b2e134849f078", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/8 [00:00\n", - "Token Activations:  0.0  0.2  0.5  0.8  1.0 
, a plate that dissolve into the image of the full moon, with an almost hypnotic use of cross fades), to the many pagan and pantheist rituals,
 the charm and wit of \"What Alice Forgot\" and \"The Last Anniversary\". I also liked the character development of the more serious \"The Hypnotist'
BOS own story, because when the two of them are on screen together sparks fly (literally!) and its powerful, electric, hypnotizing. They get
 voice put me in a trance that made me want to close my eyes to soak it all in, but hes so gorgeous, how could I? Joshua
BOSA sweeping, carefully reconstructed portrait of a nationleaps through centuries.”—New York Times “A spellbinding tour of ancient Ireland.”—Booklist Praise for
\\nBy James E. Watson\\nNarrated By James E. Watson\\nAfter Breaking Up relieves the pain by combining the immediate proven effectiveness of clinical hypnosis along with
BOS.\\nAfter the narcotics transaction was completed — which included a seizure of 50 Percocet pills (an amount that results in a trafficking charge)
 door of the Wal Mart. We waited, some patiently, others irritated because nature messed up their hurried day.\\nI am always mesmerized by rainfall. I
 The officers detained the individuals and requested the assistance of the Hawkins County Sheriff's Narcotics Unit,\" Hawkins County Sheriff Ronnie Lawson said in a news release. Brown
 James Brown and Sam & Dave, the Austin-based band is a riveting live act capable of sending dancers into a frenzy with deranged material like \"She'
BOS opening with Family Guy characters (fuck, I nearly forgot how trippy and weird it was) and using an actual clip (\"...Its!!\"). It also
 of information.\\nGerman Professor Brian Tucker talking about the Goethe Haus.\\nThe style of the paintings was just entrancing, bright, and vibrant. After going
 Burlington. A look inside Americas criminal-justice system from the dealer to the narcotics officer, the inmate to the federal judge. Free. (80
 a different virtual world. Trippy.\\nMost important of all though, it means therell need to be an internal inquiry into the scoring of the April
.S.Code, prohibiting the purchase or sale of a narcotic drug, counts 3 and 4 of which charged separate violations on December 17,
BOSBOSFootball player: #53Brannon, Robert\tReference URL\\nBrannon, Robert\\nORAL HISTORY OF ROBERT BRANNON\\nInterviewed and recorded
BOS heard these songs before, that the listener \"has always known these songs\". The entire set of songs is slow, meditative, reflective. The music sounds simply
 I see kids come in to the places ive worked totally green and they expect to be guided, or transfixed or work closely with a chef but the truth is
 and urine tests to check for the presence of anabolic agents, growth hormones, diuretics, stimulants, narcotics, cannabinoids and other banned substances. Athletes must
1925 “Breakfast Alcoves: dreamy intimate seating\\nGrace & Poise\\nFull Feminine Skirts\\nShabby Chic Tiny Retreat\\nTea for
BOS informed them that they'd just committed a felony.For more lulz, those weapons where that has been done are now considered permanently Title II weapons subject
 of The Newton Papers, by Sarah Dry A riveting and untold story, The Newton Papers reveals a man altogether stranger and more complicated than the genius of legend.\\n
, ceramics, collage, connection, craft, design, drawing, dreamy, Etsy, fashion, Flickr, graffiti, graphic, handmade, illustration, indie, joy
'll be dreaming about that dreamy piano!\\nThat's a hilarious story. I'm guessing the groom's mother felt like barfing, herself
.. besides then why the surprise visit? Cabello really tired the other as wellthe narcogeneralesthis would be like an AA meeting one day at a
re not alone. What was it like going back to Kiribati?\tReally trippy. At low tide, it seemed like nothing had changed since I lived
 lulls and lures the faithful away from the church. Islam for what its worth is in your face as was Communism. Orthodox Christianity survived the Ottoman Empire. Christianity
-zilla mixing of (often bad) pop music, hip nightclub scenes and psychedelic imagery (including animated interludes and a bad acid trip) with an environmental message
 youth. A brutally captivating chorus wraps desolation and desperation into the lyrics 'We are made from broken parts ... we are broken from the start'. It's self
, TV is the opiate of the masses, but there are still some programs with intellectual and artistic merit.\\nlong-time reader commented on Are Writers at
BOS\\nEight Top Considerations When Evaluating Cloud-Based Solutions for Finance\\nTake Heed From the Nirvanix Failure: Diversify and Hedge Your Cloud Services\\n
. During his junior year, his father kicked him out of the house and Bennett eventually dropped out of high school before turning his attention to selling narcotics. In between
 to have Nirvana find you.\\nYes, I am sure you could say more about that, Chas, as I did years ago in an article writ for the
 it because it contains no lulz. Well most of his comics don't.\"There is definitely an attempt at a joke in most though.ReplyDeleteIan
 been trying to hide behind marriage and family. He should not have been using religion and motivationalism as an opiate, any more than he should have been using
 from the conventional world, but rather the selfless self breaking back into the conventional world. It is only when this samadhi has been shattered that a new self
 over the AFP and PNP units in their respective islands for counter-insurgency operations. The lull on troops and enemy confrontations prompted the Command to shift some of
8 p.m. EDT on Lifetime.\\nTonights episode of “Through the Wormhole With Morgan Freeman will talk about what would happen to humans
 the local Hempstead community, and the weekly food run which feeds over 100 needy people in Hempstead each Saturday morning. The entire Melackrinos
 stared.\\nSlowly, I surfaced from my catatonic state and began to hear the music his brilliant band were making and listened to his delivery of old and new
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ "\n", - "plot_examples(layer=\"10\",sae_size=\"131k\")" + "plot_examples(tokenizer, layer_name=\"10\", sae_size=\"131k\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -346,7 +299,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.16" + "version": "3.10.14" } }, "nbformat": 4, From 1deb05e58a054a456b7ae9bf849a7a1eb06cb833 Mon Sep 17 00:00:00 2001 From: Lucia Quirke Date: Sun, 16 Feb 2025 07:54:19 +0000 Subject: [PATCH 3/4] Simplify notebook --- delphi/latents/loader.py | 27 +++---- examples/latent_contexts.ipynb | 132 ++++++++++++++++----------------- 2 files changed, 75 insertions(+), 84 deletions(-) diff --git a/delphi/latents/loader.py b/delphi/latents/loader.py index 7aa47d03..4d3e7bb5 100644 --- a/delphi/latents/loader.py +++ b/delphi/latents/loader.py @@ -43,7 +43,7 @@ class TensorBuffer: Lazy loading buffer for cached splits. """ - path: Path | str + path: str """Path to the tensor file.""" module_path: str @@ -80,12 +80,7 @@ def __iter__(self): def load(self): split_data = load_file(self.path) - - first_latent = ( - int(self.path.stem.split("_")[0]) - if isinstance(self.path, Path) - else int(self.path.split("/")[-1].split("_")[0]) - ) + first_latent = int(self.path.split("/")[-1].split("_")[0]) activations = torch.tensor(split_data["activations"]) locations = torch.tensor(split_data["locations"].astype(np.int64)) if "tokens" in split_data: @@ -126,7 +121,7 @@ class LatentDataset: def __init__( self, - raw_dir: Path, + raw_dir: str, cfg: LatentConfig, tokenizer: Optional[Callable] = None, modules: Optional[list[str]] = None, @@ -152,7 +147,7 @@ def __init__( else: self._build_selected(raw_dir, modules, latents) # TODO: this assumes that all modules have the same config - cache_config_dir = raw_dir / modules[0] / "config.json" + cache_config_dir = f"{raw_dir}/{modules[0]}/config.json" with open(cache_config_dir, "r") as f: cache_config = json.load(f) if tokenizer is None: @@ -186,8 +181,8 @@ def load_tokens(self): ) return self.tokens - def _edges(self, raw_dir: Path, module: str) -> list[tuple[int, int]]: - module_dir = raw_dir / module + def _edges(self, raw_dir: str, module: str) -> list[tuple[int, int]]: + module_dir = Path(raw_dir) / module safetensor_files = [f for f in module_dir.glob("*.safetensors")] edges = [] for file in safetensor_files: @@ -196,27 +191,27 @@ def _edges(self, raw_dir: Path, module: str) -> list[tuple[int, int]]: edges.sort(key=lambda x: x[0]) return edges - def _build(self, raw_dir: Path, modules: Optional[list[str]] = None): + def _build(self, raw_dir: str, modules: Optional[list[str]] = None): """ Build dataset buffers which load all cached latents. Args: - raw_dir (Path): Directory containing raw latent data. + raw_dir (str): Directory containing raw latent data. modules (Optional[list[str]]): list of module names to include. """ - modules = os.listdir(str(raw_dir)) if modules is None else modules + modules = os.listdir(raw_dir) if modules is None else modules for module in modules: edges = self._edges(raw_dir, module) for start, end in edges: - path = raw_dir / module / f"{start}_{end}.safetensors" + path = f"{raw_dir}/{module}/{start}_{end}.safetensors" self.buffers.append( TensorBuffer(path, module, min_examples=self.cfg.min_examples) ) def _build_selected( self, - raw_dir: Path, + raw_dir: str, modules: list[str], latents: dict[str, Union[int, torch.Tensor]], ): diff --git a/examples/latent_contexts.ipynb b/examples/latent_contexts.ipynb index 655e4ab9..e5eabcea 100644 --- a/examples/latent_contexts.ipynb +++ b/examples/latent_contexts.ipynb @@ -30,15 +30,6 @@ "from delphi.latents.samplers import sample\n" ] }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-2-9b\")" - ] - }, { "cell_type": "code", "execution_count": 7, @@ -139,68 +130,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Edges [(0, 26213), (26214, 52427), (52428, 78642), (78643, 104856), (104857, 131071)]\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0a594d12fbae4e8d816c7e6e94ae7228", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Text(value='', description='Explanation:'), Button(description='Submit', style=ButtonStyle()), …" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "layers.10_latent1\n", - "40\n", - "40\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "Token Activations:  0.0  0.2  0.5  0.7  1.0 
 of all of his committee memberships except Foreign Relations. In 1850, Benton was still opposed to the series of measures known as the “Great Compromise
 of his diocese, and when Bishop Rosati was appointed his coadjutor, New Orleans became again his residence. In 1826 Bishop Dubourg
 technologies available today. In the late '90s, there were two “critical concerns in the digital audio industry: Year of 2000
s\\nfew remaining high quality natural areas; this site was dedicated as a\\npreserve in 1970. In 1986 it was
ina became a federal judge in New York. In 1949, he presided over the trial of 11 leaders of the U.S.
both in Europe). Henderson actually got the jump on the others, making measurements of Alpha Centauri. In 1833, he packed up and went
 intelligence (AI). In 1990-91, while at McGill University, Montreal, Canada, based on this approach.\\nDr. Stil
\\nThe lead-acid wet cell reigned as the dominantwell, onlyform of rechargeable battery for 40 years. In 1899
 how this country defined treason and conspiracy. In 1945, the U.S. Supreme Court reviewed a conviction for treason, the only crime described
 retired at first to Bordeaux. In 1794 he emigrated to the United States where he was welcomed by Bishop Carroll. He was president of Georgetown College
 ever done before. He fought back, both in the courts and in the court of public opinion.\\nIn 1965, he organized one of
 shrapnel deep into her foot. In October 2007, she returned to work after recovering from her injuries. She is the fourth AAFES
th century the main overseas and colonial forces of the French military. In 1900 they were put under the orders of the War Ministry and took the
side in the Department of the Ohio, seeing action in Kentucky and eastern Tennessee. In May 1864, the regiment rejoined the Army of the Potomac
1963, Kenya attained independence (Uhuru) with Mzee Jomo Kenyatta\\nas the first Prime Minister. The day was named Uhuru Day.
1899, two lawyers paid a visit to the president of Coca-Cola. At the time, Coke was sold at soda fountains. But the lawyers
 Arab independence. In June 1918, Rania set up another club in Jerusalem, which became a haven for the ANZACs on leave from
2012, six partnerships received funding. The 2012 projects were conducted during the 2012 – 2013
\\nSome cooperative ventures made real money. “In the fall of 1932 Gary, Indiana, was ravaged by the depression, the steel mills were
 stand on. Finally, in 1886, the statue was dedicated.\\n- Engineer Gustave Eiffel, who would later design the Eiffel Tower in Paris
808, the Abbé Grégoire included her on his list of the courageous men who pleaded the cause of \"les nègres.\"\\n-A passionate advocate
Risk Capital via Family Money 1940s\\nDuring the 1930s, the heirs to U.S. family fortunes
 Coastal Defence Division (the Blue Berets), therefore it is sometimes referred to as the Marines of Poland. However, as of 2010 there are
82, one needed the equivalent of $200 million in current dollars. By 2005, it took $900 million to
, since Webster knew that his speech would make him unelectable in Massachusetts thereafter. On July 22, 1850, Webster resigned from the
 Time4Learning mom who uses lapbooks in her homeschool curriculum:\\n\"Since 2001, my kids have created a lapbook almost every week
02. Around 1804, the Spanish built a tower at Punta Na Radona to protect the beach at Son Bou, Minorca. In
 King Alaungpaya In 1752. In the 19lh century, during the peak period of colonialism, Myanmar was annexed in three stages
 the unit suffered 35 percent casualties. From March 1863 to January 1864, the 21st served with Burn
 he gradually overcame his opponents. On January 23, 1815, on the threshold of the New Orleans cathedral, he bestowed on General Jackson
1 had risen to become the youngest army chief of staff in Israeli history. Known for his bravery, Barak led two successful commando raids, one that stormed a hijack
s team was ordered to set up a program to weaponize the biological agents. By January 1991, a team of 100 scientists
 farms, churches, schools and businesses in their new homeland in Russia.\\nIn the later part of the 19th century, these ethnic Germans looked to
 government of Afghanistan, where terrorists linked to the 9/11 disaster were given sanctuary. By December 2002, AAFES operated more
 “U.S. Solar Market Insight: Third Quarter 2012.” By the end of third quarter 2012, 1,
, 2003\\nThe womens-to-mens earnings ratio varies by race and Hispanic origin.\\nIn 200
.\\nPrior to 2001, food security and food safety were viewed as the same. Large food manufacturers were geared to protect their assets: materials
 with what became Silicon Valley.\\nBuilding Blocks of Entrepreneurship\\nBy the mid 1950s the groundwork for a culture and environment of entrepreneurs
 in butchering practices seen in the head compared to the shin bone.\\nWhat we know about the colonists\\nIn the summer of 1609,
 from 2006 to 2010.\\n- Ratcheting Down – Beginning in 2011, the cap would be
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "def load_examples(tokenizer, layer_name=None, sae_size=\"131k\"):\n", + "def load_gemma_examples(tokenizer, raw_dir, hookpoint: str):\n", " latent_cfg = LatentConfig()\n", "\n", - " raw_dir = Path(f\"/mnt/ssd-1/lucia/delphi-2/results/gemma-baseline/latents\")\n", " experiment_cfg = ExperimentConfig(n_non_activating=0)\n", "\n", - " module = f\"layers.{layer_name}\"\n", - "\n", " dataset = LatentDataset(\n", " raw_dir=raw_dir,\n", " cfg=latent_cfg,\n", - " modules=[module],\n", - " latents={module: torch.arange(100)},\n", + " modules=[hookpoint],\n", + " latents={hookpoint: torch.arange(100)},\n", " )\n", " constructor = partial(\n", " default_constructor,\n", @@ -222,8 +165,8 @@ " return all_examples, maximum_activations\n", "\n", "\n", - "def plot_examples(tokenizer, layer_name=None, sae_size=\"131k\"):\n", - " all_examples, maximum_activations = load_examples(tokenizer, layer_name, sae_size)\n", + "def plot_examples(tokenizer, raw_dir, hookpoint: str):\n", + " all_examples, maximum_activations = load_gemma_examples(tokenizer, raw_dir, hookpoint)\n", " keys = list(all_examples.keys())\n", "\n", " current_index = [\n", @@ -265,9 +208,9 @@ " display_example(current_index[0])\n", "\n", " def on_save(b):\n", - " with open(f\"{layer_name}_explanations.json\", \"w\") as f:\n", + " with open(f\"{hookpoint}_explanations.json\", \"w\") as f:\n", " json.dump(explanations, f)\n", - " print(f\"Explanations saved to {layer_name}_explanations.json\")\n", + " print(f\"Explanations saved to {hookpoint}_explanations.json\")\n", "\n", " text_box = widgets.Text(description=\"Explanation:\")\n", " submit_button = widgets.Button(description=\"Submit\")\n", @@ -277,9 +220,62 @@ " save_button = widgets.Button(description=\"Save\")\n", " save_button.on_click(on_save)\n", " display(widgets.HBox([text_box, submit_button, skip_button, save_button]))\n", - " display_example(current_index[0])\n", + " display_example(current_index[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2422c5fd14cf4e53b32f989b191c9534", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(Text(value='', description='Explanation:'), Button(description='Submit', style=ButtonStyle()), …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gpt_neox.layers.1_latent5\n", + "40\n", + "40\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "Token Activations:  -0.6  -0.4  -0.3  -0.1  0.0  0.2  0.4  0.6  0.8 
 arteritis. Symptoms of this condition can include headaches, scalp tenderness, jaw pain, blurred vision, double
 a turbulent field of flowing grass, eerily surrounded by dark, ominous clouds.\\nFireworks are such an exciting
 soon, an alarm would sound to signal a false-start.\\nThese button-mashing duels revealed that, on average, the players
 world dark enough and yet clear enough to see these collisions occurring.\\nThat's if the Morlocks don't get you first.\\nWhy do
imates of the flow rate appear to have impeded planning for source-control efforts to completely cap the leak.\\nThe criticisms may
aries of life are not easily carried about, and hence men agreed to employ in their dealings with each other something which was intrinsically useful and
 to be the most commonly employed economic statistics: a remarkable violation of the economists' principle of the virtues of the market!
gical removal would involve a very extensive procedure involving vital structures. In some instances, radiation therapy may be employed before surgery or chem
 and were present at the consultation.\\nUpon their departure, Vindicius secretly quitted the house, but was at a loss what
ile to serve as Algeria's fourth president. Facing sporadic outbreaks of violence and terrorism, the security forces took control of
 leading voice behind the new hospital was Jane Renwick Smedburg Wilkes, remembered as the \"Godmother of Charl
 storage, habitat/watershed protection and enhancement, enhanced water conservation, and market reallocation. The Integrated Plan was developed to
While the continental U.S. shivered through an abnormally cold spell in December 2013, Alaska experienced record-breaking heat.
verse health effects from exposure to power frequency EMF. Over the past 30 years, there have been many scientific studies conducted on power frequency EMF.
 do not represent a fixed establishment of a new \"invention\", but appear only to order a wide spread type of polyphony \" into a fixed
ued since the year 1790, application was made to Chancery in 1819, and a decree obtained in 1821, appointing the ald
uled by Persian governors appointed by Alexander and his successors. Ardvates, 317-284 B.C., freed himself
 trees, frequently referred to as \"living Christmas trees\", are sold live with roots and soil, often from a nursery, to be stored at nurs
ms under conditions of constant darkness. SynRas mice exhibited circadian rhythms in locomotor activities similar to WT mice.
ations will be used for fungal pericarditis\\nOther medicines that may be used are:\\nCorticosteroids such as predn
 our present.\\nActors reminiscences may rarely attract the attention of readers, but the recently published The Richard Burton Diaries (IS
 foods as it does to cocaine or nicotine? According to Dr. Pam Peeke, author of The Hunger Fix,
3 g of licorice root daily should be safe for most adults. (See Safety Issues.) Individuals who wish to take a higher dose should
 sunscreen if you don't mind the appearance of the white layer on your skin [POINT], but I dont see the point
, forcing the employer to incur a total labor cost of $12,000 for an employee worth to the employer only $10,000 will
 modest today than they were 20 years ago.\" According to Segev, there is daily evidence of the fact that positions are becoming more entrenched and
man.)|\\n|gentlemen's club||strip club, go-go bar|\\n|in the family way||pregn
urgitative feeding of the brooding female by the male was observed at close range at Sioux City, Iowa, by Dales and B
 community specialists, youth court advocates, childcare workers, psychological assistants, and more. The undergraduate degree also serves as excellent preparation for
 up new cases of the virus before vocal international concern at the global spread of SARS prompted a change in policy.\\nFor a moment,
 do honour to the founder of the Fund, and secondly, it was their wish that the names of those 22 of their comrades, who
 no sound being generated from speaker 29 of a receiving remote unit. According to the exemplary embodiment of FIG. 5, background noise is continuously
uxury accommodation.\\nEspecially along the eastern boundary, heavy industrial activity like cement factories and an oil refinery contribute to a
 noble and challenging task, and the scale of difficulties rises when the children have anxiety.\\nNonetheless, as what American cultural
\\nHow to prevent anxiety and fear\\nExplain regularly who people are and what is going on\\nProvide reassurance and explain what is going
, state Education Department officials have been receptive and have asked for information about how other states handle sex education.\\nState Education Department officials issued a two
's health.\\nBut if some or all of the kryptonian genes are dominant...\\nCan the infant use his X-ray vision
sacea lesions. \\nAlthough rosacea rarely appears in children, its potential occurrence should be considered during medical examinations because of
 an early warning system in which each rumor deserves investigation to determine its veracity; 2) Apply a case definition that will have a high
 When it comes to current capacity, “The total installed capacity of grid interactive renewable power, which was 16817 MW as on 31
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-2-9b\")\n", + "# raw_dir = Path(f\"/mnt/ssd-1/lucia/delphi-2/results/gemma-baseline/latents\")\n", + "# hookpoint = \"layers.10\"\n", + "# plot_examples(tokenizer, raw_dir, hookpoint)\n", "\n", - "plot_examples(tokenizer, layer_name=\"10\", sae_size=\"131k\")" + "tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/FineWeb-restricted\")\n", + "raw_dir = Path(f\"/mnt/ssd-1/lucia/clearnets/results/clearnet-57280/latents\")\n", + "hookpoint = f\"gpt_neox.layers.1\"\n", + "plot_examples(tokenizer, raw_dir, hookpoint)" ] } ], From 1e61d088317d32f9411d9a127dfdddd9d62074f6 Mon Sep 17 00:00:00 2001 From: Lucia Quirke Date: Mon, 17 Feb 2025 02:26:56 +0000 Subject: [PATCH 4/4] Format all jsons with line breaks; use default value that doesn't leak answers in scorers; fix bug where verbose=True changes a return type --- .gitignore | 1 + delphi/__main__.py | 5 +++-- delphi/clients/offline.py | 7 ++++++- delphi/explainers/default/default.py | 8 +++----- delphi/latents/cache.py | 2 +- delphi/latents/neighbours.py | 2 +- delphi/scorers/classifier/detection.py | 2 +- delphi/scorers/classifier/fuzz.py | 8 ++++---- examples/latent_contexts.ipynb | 28 ++++++++++++++------------ 9 files changed, 35 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 313c5e82..c839d7e4 100644 --- a/.gitignore +++ b/.gitignore @@ -181,3 +181,4 @@ cython_debug/ # Development artifacts results/ .vscode/ +statistics/ diff --git a/delphi/__main__.py b/delphi/__main__.py index 0ed84479..956505da 100644 --- a/delphi/__main__.py +++ b/delphi/__main__.py @@ -117,6 +117,7 @@ async def process_cache( # set of examples max_model_len=run_cfg.explainer_model_max_len, num_gpus=run_cfg.num_gpus, + statistics=run_cfg.verbose, ) elif run_cfg.explainer_provider == "openrouter": if ( @@ -171,7 +172,7 @@ def scorer_postprocess(result, score_dir): DetectionScorer( client, tokenizer=dataset.tokenizer, # type: ignore - batch_size=run_cfg.num_examples_per_scorer_prompt, + n_examples_shown=run_cfg.num_examples_per_scorer_prompt, verbose=run_cfg.verbose, log_prob=False, ), @@ -182,7 +183,7 @@ def scorer_postprocess(result, score_dir): FuzzingScorer( client, tokenizer=dataset.tokenizer, # type: ignore - batch_size=run_cfg.num_examples_per_scorer_prompt, + n_examples_shown=run_cfg.num_examples_per_scorer_prompt, verbose=run_cfg.verbose, log_prob=False, ), diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index aae1f93e..cd70bd48 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -2,6 +2,7 @@ import json from dataclasses import dataclass from functools import partial +from pathlib import Path from typing import Union from transformers import AutoTokenizer @@ -69,6 +70,10 @@ def __init__( self.batch_size = batch_size self.statistics = statistics + if self.statistics: + self.statistics_path = Path("statistics") + self.statistics_path.mkdir(parents=True, exist_ok=True) + async def process_func(self, batches: Union[str, list[dict[str, str]]], kwargs): """ Process a single request. @@ -127,7 +132,7 @@ async def process_func(self, batches: Union[str, list[dict[str, str]]], kwargs): with open( f"statistics/{hash(batches[i][-1]['content'][-100:])}.json", "w" ) as f: - json.dump(statistics[i].__dict__, f) + json.dump(statistics[i].__dict__, f, indent=4) new_response.append( Response( text=r.outputs[0].text, diff --git a/delphi/explainers/default/default.py b/delphi/explainers/default/default.py index 80774e86..d6a69037 100644 --- a/delphi/explainers/default/default.py +++ b/delphi/explainers/default/default.py @@ -40,11 +40,9 @@ async def __call__(self, record): try: explanation = self.parse_explanation(response.text) if self.verbose: - return ( - messages[-1]["content"], - response, - ExplainerResult(record=record, explanation=explanation), - ) + logger.info(f"Explanation: {explanation}") + logger.info(f"Final message to explainer: {messages[-1]['content']}") + logger.info(f"Response from explainer: {response.text}") return ExplainerResult(record=record, explanation=explanation) except Exception as e: diff --git a/delphi/latents/cache.py b/delphi/latents/cache.py index 044bb9cd..cc04b5c6 100644 --- a/delphi/latents/cache.py +++ b/delphi/latents/cache.py @@ -361,4 +361,4 @@ def save_config(self, save_dir: Path, cfg: CacheConfig, model_name: str): with open(config_file, "w") as f: config_dict = cfg.to_dict() config_dict["model_name"] = model_name - json.dump(config_dict, f) + json.dump(config_dict, f, indent=4) diff --git a/delphi/latents/neighbours.py b/delphi/latents/neighbours.py index 90a4e587..0b370584 100644 --- a/delphi/latents/neighbours.py +++ b/delphi/latents/neighbours.py @@ -219,7 +219,7 @@ def save_neighbour_cache(self) -> None: Save the neighbour cache to the path as a json file """ with open(self.path, "w") as f: - json.dump(self.neighbour_cache, f) + json.dump(self.neighbour_cache, f, indent=4) def load_neighbour_cache(self) -> dict[str, dict[int, list[int]]]: """ diff --git a/delphi/scorers/classifier/detection.py b/delphi/scorers/classifier/detection.py index 3ce1b1df..fe1f0255 100644 --- a/delphi/scorers/classifier/detection.py +++ b/delphi/scorers/classifier/detection.py @@ -15,7 +15,7 @@ def __init__( client: Client, tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, verbose: bool = False, - n_examples_shown: int = 10, + n_examples_shown: int = 1, log_prob: bool = False, temperature: float = 0.0, **generation_kwargs, diff --git a/delphi/scorers/classifier/fuzz.py b/delphi/scorers/classifier/fuzz.py index 09bca111..9488efcb 100644 --- a/delphi/scorers/classifier/fuzz.py +++ b/delphi/scorers/classifier/fuzz.py @@ -20,7 +20,7 @@ def __init__( client: Client, tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, verbose: bool = False, - n_examples_shown: int = 10, + n_examples_shown: int = 1, threshold: float = 0.3, log_prob: bool = False, temperature: float = 0.0, @@ -35,9 +35,9 @@ def __init__( verbose: Whether to print verbose output. n_examples_shown: The number of examples to show in the prompt, a larger number can both leak information and make - it harder for models to generate anwers in the correct format - log_prob: Whether to use log probabilities to allow for AUC calculation - generation_kwargs: Additional generation kwargs + it harder for models to generate anwers in the correct format. + log_prob: Whether to use log probabilities to allow for AUC calculation. + generation_kwargs: Additional generation kwargs. """ super().__init__( client=client, diff --git a/examples/latent_contexts.ipynb b/examples/latent_contexts.ipynb index e5eabcea..445e4ff0 100644 --- a/examples/latent_contexts.ipynb +++ b/examples/latent_contexts.ipynb @@ -209,7 +209,7 @@ "\n", " def on_save(b):\n", " with open(f\"{hookpoint}_explanations.json\", \"w\") as f:\n", - " json.dump(explanations, f)\n", + " json.dump(explanations, f, indent=4)\n", " print(f\"Explanations saved to {hookpoint}_explanations.json\")\n", "\n", " text_box = widgets.Text(description=\"Explanation:\")\n", @@ -225,13 +225,20 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Edges [(0, 26213), (26214, 52427), (52428, 78642), (78643, 104856), (104857, 131071)]\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2422c5fd14cf4e53b32f989b191c9534", + "model_id": "c04bf7b6819e4f9b9d3bdbb95987e811", "version_major": 2, "version_minor": 0 }, @@ -246,7 +253,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "gpt_neox.layers.1_latent5\n", + "layers.10_latent1\n", "40\n", "40\n" ] @@ -256,7 +263,7 @@ "text/html": [ "\n", "\n", - "Token Activations:  -0.6  -0.4  -0.3  -0.1  0.0  0.2  0.4  0.6  0.8 
 arteritis. Symptoms of this condition can include headaches, scalp tenderness, jaw pain, blurred vision, double
 a turbulent field of flowing grass, eerily surrounded by dark, ominous clouds.\\nFireworks are such an exciting
 soon, an alarm would sound to signal a false-start.\\nThese button-mashing duels revealed that, on average, the players
 world dark enough and yet clear enough to see these collisions occurring.\\nThat's if the Morlocks don't get you first.\\nWhy do
imates of the flow rate appear to have impeded planning for source-control efforts to completely cap the leak.\\nThe criticisms may
aries of life are not easily carried about, and hence men agreed to employ in their dealings with each other something which was intrinsically useful and
 to be the most commonly employed economic statistics: a remarkable violation of the economists' principle of the virtues of the market!
gical removal would involve a very extensive procedure involving vital structures. In some instances, radiation therapy may be employed before surgery or chem
 and were present at the consultation.\\nUpon their departure, Vindicius secretly quitted the house, but was at a loss what
ile to serve as Algeria's fourth president. Facing sporadic outbreaks of violence and terrorism, the security forces took control of
 leading voice behind the new hospital was Jane Renwick Smedburg Wilkes, remembered as the \"Godmother of Charl
 storage, habitat/watershed protection and enhancement, enhanced water conservation, and market reallocation. The Integrated Plan was developed to
While the continental U.S. shivered through an abnormally cold spell in December 2013, Alaska experienced record-breaking heat.
verse health effects from exposure to power frequency EMF. Over the past 30 years, there have been many scientific studies conducted on power frequency EMF.
 do not represent a fixed establishment of a new \"invention\", but appear only to order a wide spread type of polyphony \" into a fixed
ued since the year 1790, application was made to Chancery in 1819, and a decree obtained in 1821, appointing the ald
uled by Persian governors appointed by Alexander and his successors. Ardvates, 317-284 B.C., freed himself
 trees, frequently referred to as \"living Christmas trees\", are sold live with roots and soil, often from a nursery, to be stored at nurs
ms under conditions of constant darkness. SynRas mice exhibited circadian rhythms in locomotor activities similar to WT mice.
ations will be used for fungal pericarditis\\nOther medicines that may be used are:\\nCorticosteroids such as predn
 our present.\\nActors reminiscences may rarely attract the attention of readers, but the recently published The Richard Burton Diaries (IS
 foods as it does to cocaine or nicotine? According to Dr. Pam Peeke, author of The Hunger Fix,
3 g of licorice root daily should be safe for most adults. (See Safety Issues.) Individuals who wish to take a higher dose should
 sunscreen if you don't mind the appearance of the white layer on your skin [POINT], but I dont see the point
, forcing the employer to incur a total labor cost of $12,000 for an employee worth to the employer only $10,000 will
 modest today than they were 20 years ago.\" According to Segev, there is daily evidence of the fact that positions are becoming more entrenched and
man.)|\\n|gentlemen's club||strip club, go-go bar|\\n|in the family way||pregn
urgitative feeding of the brooding female by the male was observed at close range at Sioux City, Iowa, by Dales and B
 community specialists, youth court advocates, childcare workers, psychological assistants, and more. The undergraduate degree also serves as excellent preparation for
 up new cases of the virus before vocal international concern at the global spread of SARS prompted a change in policy.\\nFor a moment,
 do honour to the founder of the Fund, and secondly, it was their wish that the names of those 22 of their comrades, who
 no sound being generated from speaker 29 of a receiving remote unit. According to the exemplary embodiment of FIG. 5, background noise is continuously
uxury accommodation.\\nEspecially along the eastern boundary, heavy industrial activity like cement factories and an oil refinery contribute to a
 noble and challenging task, and the scale of difficulties rises when the children have anxiety.\\nNonetheless, as what American cultural
\\nHow to prevent anxiety and fear\\nExplain regularly who people are and what is going on\\nProvide reassurance and explain what is going
, state Education Department officials have been receptive and have asked for information about how other states handle sex education.\\nState Education Department officials issued a two
's health.\\nBut if some or all of the kryptonian genes are dominant...\\nCan the infant use his X-ray vision
sacea lesions. \\nAlthough rosacea rarely appears in children, its potential occurrence should be considered during medical examinations because of
 an early warning system in which each rumor deserves investigation to determine its veracity; 2) Apply a case definition that will have a high
 When it comes to current capacity, “The total installed capacity of grid interactive renewable power, which was 16817 MW as on 31
" + "Token Activations:  0.0  0.2  0.5  0.7  1.0 
 of all of his committee memberships except Foreign Relations. In 1850, Benton was still opposed to the series of measures known as the “Great Compromise
 of his diocese, and when Bishop Rosati was appointed his coadjutor, New Orleans became again his residence. In 1826 Bishop Dubourg
 technologies available today. In the late '90s, there were two “critical concerns in the digital audio industry: Year of 2000
s\\nfew remaining high quality natural areas; this site was dedicated as a\\npreserve in 1970. In 1986 it was
ina became a federal judge in New York. In 1949, he presided over the trial of 11 leaders of the U.S.
both in Europe). Henderson actually got the jump on the others, making measurements of Alpha Centauri. In 1833, he packed up and went
 intelligence (AI). In 1990-91, while at McGill University, Montreal, Canada, based on this approach.\\nDr. Stil
\\nThe lead-acid wet cell reigned as the dominantwell, onlyform of rechargeable battery for 40 years. In 1899
 how this country defined treason and conspiracy. In 1945, the U.S. Supreme Court reviewed a conviction for treason, the only crime described
 retired at first to Bordeaux. In 1794 he emigrated to the United States where he was welcomed by Bishop Carroll. He was president of Georgetown College
 ever done before. He fought back, both in the courts and in the court of public opinion.\\nIn 1965, he organized one of
 shrapnel deep into her foot. In October 2007, she returned to work after recovering from her injuries. She is the fourth AAFES
th century the main overseas and colonial forces of the French military. In 1900 they were put under the orders of the War Ministry and took the
side in the Department of the Ohio, seeing action in Kentucky and eastern Tennessee. In May 1864, the regiment rejoined the Army of the Potomac
1963, Kenya attained independence (Uhuru) with Mzee Jomo Kenyatta\\nas the first Prime Minister. The day was named Uhuru Day.
1899, two lawyers paid a visit to the president of Coca-Cola. At the time, Coke was sold at soda fountains. But the lawyers
 Arab independence. In June 1918, Rania set up another club in Jerusalem, which became a haven for the ANZACs on leave from
2012, six partnerships received funding. The 2012 projects were conducted during the 2012 – 2013
\\nSome cooperative ventures made real money. “In the fall of 1932 Gary, Indiana, was ravaged by the depression, the steel mills were
 stand on. Finally, in 1886, the statue was dedicated.\\n- Engineer Gustave Eiffel, who would later design the Eiffel Tower in Paris
808, the Abbé Grégoire included her on his list of the courageous men who pleaded the cause of \"les nègres.\"\\n-A passionate advocate
Risk Capital via Family Money 1940s\\nDuring the 1930s, the heirs to U.S. family fortunes
 Coastal Defence Division (the Blue Berets), therefore it is sometimes referred to as the Marines of Poland. However, as of 2010 there are
82, one needed the equivalent of $200 million in current dollars. By 2005, it took $900 million to
, since Webster knew that his speech would make him unelectable in Massachusetts thereafter. On July 22, 1850, Webster resigned from the
 Time4Learning mom who uses lapbooks in her homeschool curriculum:\\n\"Since 2001, my kids have created a lapbook almost every week
02. Around 1804, the Spanish built a tower at Punta Na Radona to protect the beach at Son Bou, Minorca. In
 King Alaungpaya In 1752. In the 19lh century, during the peak period of colonialism, Myanmar was annexed in three stages
 the unit suffered 35 percent casualties. From March 1863 to January 1864, the 21st served with Burn
 he gradually overcame his opponents. On January 23, 1815, on the threshold of the New Orleans cathedral, he bestowed on General Jackson
1 had risen to become the youngest army chief of staff in Israeli history. Known for his bravery, Barak led two successful commando raids, one that stormed a hijack
s team was ordered to set up a program to weaponize the biological agents. By January 1991, a team of 100 scientists
 farms, churches, schools and businesses in their new homeland in Russia.\\nIn the later part of the 19th century, these ethnic Germans looked to
 government of Afghanistan, where terrorists linked to the 9/11 disaster were given sanctuary. By December 2002, AAFES operated more
 “U.S. Solar Market Insight: Third Quarter 2012.” By the end of third quarter 2012, 1,
, 2003\\nThe womens-to-mens earnings ratio varies by race and Hispanic origin.\\nIn 200
.\\nPrior to 2001, food security and food safety were viewed as the same. Large food manufacturers were geared to protect their assets: materials
 with what became Silicon Valley.\\nBuilding Blocks of Entrepreneurship\\nBy the mid 1950s the groundwork for a culture and environment of entrepreneurs
 in butchering practices seen in the head compared to the shin bone.\\nWhat we know about the colonists\\nIn the summer of 1609,
 from 2006 to 2010.\\n- Ratcheting Down – Beginning in 2011, the cap would be
" ], "text/plain": [ "" @@ -267,14 +274,9 @@ } ], "source": [ - "# tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-2-9b\")\n", - "# raw_dir = Path(f\"/mnt/ssd-1/lucia/delphi-2/results/gemma-baseline/latents\")\n", - "# hookpoint = \"layers.10\"\n", - "# plot_examples(tokenizer, raw_dir, hookpoint)\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/FineWeb-restricted\")\n", - "raw_dir = Path(f\"/mnt/ssd-1/lucia/clearnets/results/clearnet-57280/latents\")\n", - "hookpoint = f\"gpt_neox.layers.1\"\n", + "tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-2-9b\")\n", + "raw_dir = Path(f\"/mnt/ssd-1/lucia/delphi-2/results/gemma-baseline/latents\")\n", + "hookpoint = \"layers.10\"\n", "plot_examples(tokenizer, raw_dir, hookpoint)" ] }