From 6376f8a8f8353c11b2e1f52110cfb7b6acfb71f2 Mon Sep 17 00:00:00 2001 From: Goncalo Paulo <30472805+SrGonao@users.noreply.github.com> Date: Mon, 2 Jun 2025 14:17:50 +0100 Subject: [PATCH 01/22] Update build.yml --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5ada2a99..a272de8a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,6 +22,8 @@ jobs: run: | python -m pip install --upgrade pip pip install -e ".[dev,visualize]" + - name: Check space + run: du -sh - name: Run tests run: pytest - name: build From 0cb78b34765c903409e142118df0c4da3f3650cf Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 13:19:53 +0000 Subject: [PATCH 02/22] Debug tests --- tests/conftest.py | 2 +- tests/test_latents/test_cache.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 34376088..f7b787e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -98,7 +98,7 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM # Save the cache config - cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-70m") + cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-160m") hookpoint_firing_counts = torch.load( log_path / "hookpoint_firing_counts.pt", weights_only=True ) diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py index c62c1cd6..e4bf5853 100644 --- a/tests/test_latents/test_cache.py +++ b/tests/test_latents/test_cache.py @@ -16,6 +16,7 @@ def test_latent_locations(cache_setup: dict[str, Any]): locations = cache.cache.latent_locations["layers.1"] max_values, _ = locations.max(axis=0) # Expected values based on the cache run + print(max_values) assert max_values[0] == 5, "Expected first dimension max value to be 5" assert max_values[1] == 15, "Expected token ids to go up to 15" assert max_values[2] > 32700, "Expected latent dimension around 32768" @@ -27,6 +28,7 @@ def test_split_files_created(cache_setup: dict[str, Any]): """ save_dir = cache_setup["temp_dir"] / "layers.1" cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")] + print(cache_files) assert len(cache_files) == 5, "Expected 5 split files in the cache directory" @@ -58,6 +60,7 @@ def test_split_file_contents(cache_setup: dict[str, Any]): err_msg="Tokens saved do not match the input tokens", ) max_values = locations.max(axis=0) + print(max_values) assert max_values[0] == 5, "Max batch index mismatch in saved file" assert max_values[1] == 15, "Max token value mismatch in saved file" assert max_values[2] > 6499, "Latent dimension mismatch in saved file" From 00fe96f7b64a19ae9a7e0b895e24bbb56ceecac2 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 13:23:29 +0000 Subject: [PATCH 03/22] More prints --- tests/test_latents/test_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py index e4bf5853..850928aa 100644 --- a/tests/test_latents/test_cache.py +++ b/tests/test_latents/test_cache.py @@ -74,7 +74,7 @@ def test_config_file(cache_setup: dict[str, Any]): with open(config_path, "r") as f: config = json.load(f) cache_cfg = cache_setup["cache_cfg"] - + print(cache_cfg) assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch" assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "Cache_ctx_len mismatch" assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch" From e7ce605aeec089c5cd6d7b3203b97185f1310518 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 13:35:40 +0000 Subject: [PATCH 04/22] test --- tests/test_latents/{test_cache.py => ucache.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/test_latents/{test_cache.py => ucache.py} (100%) diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/ucache.py similarity index 100% rename from tests/test_latents/test_cache.py rename to tests/test_latents/ucache.py From 93790c349b481e761f6d41b86b62b67753cf07dd Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 13:57:23 +0000 Subject: [PATCH 05/22] no more firing counts --- tests/conftest.py | 108 +++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f7b787e8..345a4661 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,65 +99,65 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM # Save the cache config cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-160m") - hookpoint_firing_counts = torch.load( - log_path / "hookpoint_firing_counts.pt", weights_only=True - ) + # hookpoint_firing_counts = torch.load( + # log_path / "hookpoint_firing_counts.pt", weights_only=True + # ) return { "cache": cache, "tokens": tokens, "cache_cfg": cache_cfg, "temp_dir": temp_dir, - "firing_counts": hookpoint_firing_counts, + # "firing_counts": hookpoint_firing_counts, } -def test_hookpoint_firing_counts_initialization(cache_setup): - """ - Ensure that hookpoint_firing_counts is initialized as an empty dictionary. - """ - cache = cache_setup["cache"] - assert isinstance(cache.hookpoint_firing_counts, dict) - assert len(cache.hookpoint_firing_counts) == 0 # Should be empty before run() - - -def test_hookpoint_firing_counts_updates(cache_setup): - """ - Ensure that hookpoint_firing_counts is properly updated after running the cache. - """ - cache = cache_setup["cache"] - tokens = cache_setup["tokens"] - cache.run(cache_setup["cache_cfg"].n_tokens, tokens) - - assert ( - len(cache.hookpoint_firing_counts) > 0 - ), "hookpoint_firing_counts should not be empty after run()" - for hookpoint, counts in cache.hookpoint_firing_counts.items(): - assert isinstance( - counts, torch.Tensor - ), f"Counts for {hookpoint} should be a torch.Tensor" - assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor" - assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative" - - -def test_hookpoint_firing_counts_persistence(cache_setup): - """ - Ensure that hookpoint_firing_counts are correctly saved and loaded. - """ - cache = cache_setup["cache"] - cache.save_firing_counts() - - firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt" - assert firing_counts_path.exists(), "Firing counts file should exist after saving" - - loaded_counts = torch.load(firing_counts_path, weights_only=True) - assert isinstance( - loaded_counts, dict - ), "Loaded firing counts should be a dictionary" - assert ( - loaded_counts.keys() == cache.hookpoint_firing_counts.keys() - ), "Loaded firing counts keys should match saved keys" - - for hookpoint, counts in loaded_counts.items(): - assert torch.equal( - counts, cache.hookpoint_firing_counts[hookpoint] - ), f"Mismatch in firing counts for {hookpoint}" +# def test_hookpoint_firing_counts_initialization(cache_setup): +# """ +# Ensure that hookpoint_firing_counts is initialized as an empty dictionary. +# """ +# cache = cache_setup["cache"] +# assert isinstance(cache.hookpoint_firing_counts, dict) +# assert len(cache.hookpoint_firing_counts) == 0 # Should be empty before run() + + +# def test_hookpoint_firing_counts_updates(cache_setup): +# """ +# Ensure that hookpoint_firing_counts is properly updated after running the cache. +# """ +# cache = cache_setup["cache"] +# tokens = cache_setup["tokens"] +# cache.run(cache_setup["cache_cfg"].n_tokens, tokens) + +# assert ( +# len(cache.hookpoint_firing_counts) > 0 +# ), "hookpoint_firing_counts should not be empty after run()" +# for hookpoint, counts in cache.hookpoint_firing_counts.items(): +# assert isinstance( +# counts, torch.Tensor +# ), f"Counts for {hookpoint} should be a torch.Tensor" +# assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor" +# assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative" + + +# def test_hookpoint_firing_counts_persistence(cache_setup): +# """ +# Ensure that hookpoint_firing_counts are correctly saved and loaded. +# """ +# cache = cache_setup["cache"] +# cache.save_firing_counts() + +# firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt" +# assert firing_counts_path.exists(), "Firing counts file should exist after saving" + +# loaded_counts = torch.load(firing_counts_path, weights_only=True) +# assert isinstance( +# loaded_counts, dict +# ), "Loaded firing counts should be a dictionary" +# assert ( +# loaded_counts.keys() == cache.hookpoint_firing_counts.keys() +# ), "Loaded firing counts keys should match saved keys" + +# for hookpoint, counts in loaded_counts.items(): +# assert torch.equal( +# counts, cache.hookpoint_firing_counts[hookpoint] +# ), f"Mismatch in firing counts for {hookpoint}" From 0423d0b53d1d821e5d4cd485a7c3e9041e527d5a Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:03:12 +0000 Subject: [PATCH 06/22] No tests --- tests/test_latents/test_cache.py | 80 ++++++++ tests/test_latents/test_constructor.py | 262 ++++++++++++------------- tests/test_latents/ucache.py | 80 -------- 3 files changed, 211 insertions(+), 211 deletions(-) create mode 100644 tests/test_latents/test_cache.py delete mode 100644 tests/test_latents/ucache.py diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py new file mode 100644 index 00000000..0fe42207 --- /dev/null +++ b/tests/test_latents/test_cache.py @@ -0,0 +1,80 @@ +# import json +# import os +# from pathlib import Path +# from typing import Any + +# import numpy as np +# from safetensors.numpy import load_file + + +# def test_latent_locations(cache_setup: dict[str, Any]): +# """ +# Test that the latent locations generated in memory have the expected +# shape and values. +# """ +# cache = cache_setup["cache"] +# locations = cache.cache.latent_locations["layers.1"] +# max_values, _ = locations.max(axis=0) +# # Expected values based on the cache run +# print(max_values) +# assert max_values[0] == 5, "Expected first dimension max value to be 5" +# assert max_values[1] == 15, "Expected token ids to go up to 15" +# assert max_values[2] > 32700, "Expected latent dimension around 32768" + + +# def test_split_files_created(cache_setup: dict[str, Any]): +# """ +# Test that exactly 5 cache split files have been created. +# """ +# save_dir = cache_setup["temp_dir"] / "layers.1" +# cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")] +# print(cache_files) +# assert len(cache_files) == 5, "Expected 5 split files in the cache directory" + + +# def test_split_file_contents(cache_setup: dict[str, Any]): +# """ +# Test that one of the split files (loaded via safetensors) holds convincing data: +# - latent locations and activations have the same number of entries, +# - tokens were correctly stored and match the input tokens. +# - latent max values are as expected. +# """ +# save_dir = cache_setup["temp_dir"] / "layers.1" +# tokens = cache_setup["tokens"] +# # Choose one file to verify +# cache_files = os.listdir(save_dir) +# file_path = Path(save_dir) / cache_files[0] +# saved_cache = load_file(str(file_path)) + +# locations = saved_cache["locations"] +# activations = saved_cache["activations"] +# tokens = saved_cache["tokens"] + +# assert len(locations) == len( +# activations +# ), "Mismatch between locations & activations entries" + +# np.testing.assert_array_equal( +# tokens, +# tokens[:12, :], +# err_msg="Tokens saved do not match the input tokens", +# ) +# max_values = locations.max(axis=0) +# print(max_values) +# assert max_values[0] == 5, "Max batch index mismatch in saved file" +# assert max_values[1] == 15, "Max token value mismatch in saved file" +# assert max_values[2] > 6499, "Latent dimension mismatch in saved file" + + +# def test_config_file(cache_setup: dict[str, Any]): +# """ +# Test that the saved configuration file contains the correct parameters. +# """ +# config_path = cache_setup["temp_dir"] / "layers.1" / "config.json" +# with open(config_path, "r") as f: +# config = json.load(f) +# cache_cfg = cache_setup["cache_cfg"] +# print(cache_cfg) +# assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch" +# assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch" +# assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch" diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index 0987edfa..467616cc 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -1,139 +1,139 @@ -import random -from itertools import chain -from typing import Any, Literal +# import random +# from itertools import chain +# from typing import Any, Literal -import pytest -import torch -from jaxtyping import Int -from torch import Tensor -from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast +# import pytest +# import torch +# from jaxtyping import Int +# from torch import Tensor +# from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast -from delphi.config import ConstructorConfig, SamplerConfig -from delphi.latents import ( - ActivatingExample, - Latent, - LatentDataset, - LatentRecord, - constructor, - sampler, -) -from delphi.latents.latents import ActivationData +# from delphi.config import ConstructorConfig, SamplerConfig +# from delphi.latents import ( +# ActivatingExample, +# Latent, +# LatentDataset, +# LatentRecord, +# constructor, +# sampler, +# ) +# from delphi.latents.latents import ActivationData -def test_save_load_cache( - cache_setup: dict[str, Any], - tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, -): - sampler_cfg = SamplerConfig( - n_examples_train=3, - n_examples_test=3, - n_quantiles=3, - train_type="quantiles", - test_type="quantiles", - ) - dataset = LatentDataset( - cache_setup["temp_dir"], - sampler_cfg, - ConstructorConfig(min_examples=0), - tokenizer, - ) - tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore - assert (tokens == cache_setup["tokens"][: len(tokens)]).all() - for record in dataset: - print(record) - assert len(record.train) <= sampler_cfg.n_examples_train - assert len(record.test) <= sampler_cfg.n_examples_test +# def test_save_load_cache( +# cache_setup: dict[str, Any], +# tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, +# ): +# sampler_cfg = SamplerConfig( +# n_examples_train=3, +# n_examples_test=3, +# n_quantiles=3, +# train_type="quantiles", +# test_type="quantiles", +# ) +# dataset = LatentDataset( +# cache_setup["temp_dir"], +# sampler_cfg, +# ConstructorConfig(min_examples=0), +# tokenizer, +# ) +# tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore +# assert (tokens == cache_setup["tokens"][: len(tokens)]).all() +# for record in dataset: +# print(record) +# assert len(record.train) <= sampler_cfg.n_examples_train +# assert len(record.test) <= sampler_cfg.n_examples_test -@pytest.fixture(scope="module") -def seed(): - random.seed(0) - torch.manual_seed(0) +# @pytest.fixture(scope="module") +# def seed(): +# random.seed(0) +# torch.manual_seed(0) -@pytest.mark.parametrize("n_samples", [5, 10, 100, 1000]) -@pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23]) -@pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20]) -@pytest.mark.parametrize("train_type", ["top", "random", "quantiles"]) -def test_simple_cache( - n_samples: int, - n_quantiles: int, - n_examples: int, - train_type: Literal["top", "random", "quantiles"], - ctx_len: int = 32, - seed: None = None, - *, - tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, -): - torch.manual_seed(0) - tokens = torch.randint( - 0, - 100, - ( - n_samples, - ctx_len, - ), - ) - all_activation_data = [] - all_activations = [] - for feature_idx in range(2): - activations = torch.rand(n_samples, ctx_len, 1) * ( - torch.rand(n_samples)[..., None, None] ** 2 - ) - all_activations.append(activations) - mask = activations > 0.1 - locations = torch.nonzero(mask) - locations[..., 2] = feature_idx - all_activation_data.append(ActivationData(locations, activations[mask])) - activation_data, other_activation_data = all_activation_data - activations, other_activations = all_activations - record = LatentRecord(latent=Latent("test", 0), examples=[]) - constructor( - record, - activation_data, - constructor_cfg=ConstructorConfig( - example_ctx_len=ctx_len, - min_examples=1, - n_non_activating=50, - non_activating_source="neighbours", - ), - tokens=tokens, - tokenizer=tokenizer, - all_data={0: activation_data, 1: other_activation_data}, - ) - for i, j in zip(record.examples[:-1], record.examples[1:]): - assert i.max_activation >= j.max_activation - for i in record.examples: - index = (tokens == i.tokens).all(dim=-1).float().argmax() - assert (tokens[index] == i.tokens).all() - assert activations[index].max() == i.max_activation - sampler( - record, - SamplerConfig( - n_examples_train=n_examples, - n_examples_test=n_examples, - n_quantiles=n_quantiles, - train_type=train_type, - test_type="quantiles", - ), - tokenizer=tokenizer, - ) - assert len(record.train) <= n_examples - assert len(record.test) <= n_examples - for neighbor in record.neighbours: - assert neighbor.latent_index == 1 - for example in chain(record.train, record.test): - assert isinstance(example, ActivatingExample) - assert example.normalized_activations is not None - assert example.normalized_activations.shape == example.activations.shape - assert (example.normalized_activations <= 10).all() - assert (example.normalized_activations >= 0).all() - for quantile_list in (record.test,) + ( # type: ignore - (record.train,) if train_type == "quantiles" else () - ): - quantile_list: list[ActivatingExample] = quantile_list - for k, i in enumerate(quantile_list): - for j in quantile_list[k + 1 :]: - if i.quantile != j.quantile: - assert i.max_activation >= j.max_activation - assert i.quantile < j.quantile +# @pytest.mark.parametrize("n_samples", [5, 10, 100, 1000]) +# @pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23]) +# @pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20]) +# @pytest.mark.parametrize("train_type", ["top", "random", "quantiles"]) +# def test_simple_cache( +# n_samples: int, +# n_quantiles: int, +# n_examples: int, +# train_type: Literal["top", "random", "quantiles"], +# ctx_len: int = 32, +# seed: None = None, +# *, +# tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, +# ): +# torch.manual_seed(0) +# tokens = torch.randint( +# 0, +# 100, +# ( +# n_samples, +# ctx_len, +# ), +# ) +# all_activation_data = [] +# all_activations = [] +# for feature_idx in range(2): +# activations = torch.rand(n_samples, ctx_len, 1) * ( +# torch.rand(n_samples)[..., None, None] ** 2 +# ) +# all_activations.append(activations) +# mask = activations > 0.1 +# locations = torch.nonzero(mask) +# locations[..., 2] = feature_idx +# all_activation_data.append(ActivationData(locations, activations[mask])) +# activation_data, other_activation_data = all_activation_data +# activations, other_activations = all_activations +# record = LatentRecord(latent=Latent("test", 0), examples=[]) +# constructor( +# record, +# activation_data, +# constructor_cfg=ConstructorConfig( +# example_ctx_len=ctx_len, +# min_examples=1, +# n_non_activating=50, +# non_activating_source="neighbours", +# ), +# tokens=tokens, +# tokenizer=tokenizer, +# all_data={0: activation_data, 1: other_activation_data}, +# ) +# for i, j in zip(record.examples[:-1], record.examples[1:]): +# assert i.max_activation >= j.max_activation +# for i in record.examples: +# index = (tokens == i.tokens).all(dim=-1).float().argmax() +# assert (tokens[index] == i.tokens).all() +# assert activations[index].max() == i.max_activation +# sampler( +# record, +# SamplerConfig( +# n_examples_train=n_examples, +# n_examples_test=n_examples, +# n_quantiles=n_quantiles, +# train_type=train_type, +# test_type="quantiles", +# ), +# tokenizer=tokenizer, +# ) +# assert len(record.train) <= n_examples +# assert len(record.test) <= n_examples +# for neighbor in record.neighbours: +# assert neighbor.latent_index == 1 +# for example in chain(record.train, record.test): +# assert isinstance(example, ActivatingExample) +# assert example.normalized_activations is not None +# assert example.normalized_activations.shape == example.activations.shape +# assert (example.normalized_activations <= 10).all() +# assert (example.normalized_activations >= 0).all() +# for quantile_list in (record.test,) + ( # type: ignore +# (record.train,) if train_type == "quantiles" else () +# ): +# quantile_list: list[ActivatingExample] = quantile_list +# for k, i in enumerate(quantile_list): +# for j in quantile_list[k + 1 :]: +# if i.quantile != j.quantile: +# assert i.max_activation >= j.max_activation +# assert i.quantile < j.quantile diff --git a/tests/test_latents/ucache.py b/tests/test_latents/ucache.py deleted file mode 100644 index 850928aa..00000000 --- a/tests/test_latents/ucache.py +++ /dev/null @@ -1,80 +0,0 @@ -import json -import os -from pathlib import Path -from typing import Any - -import numpy as np -from safetensors.numpy import load_file - - -def test_latent_locations(cache_setup: dict[str, Any]): - """ - Test that the latent locations generated in memory have the expected - shape and values. - """ - cache = cache_setup["cache"] - locations = cache.cache.latent_locations["layers.1"] - max_values, _ = locations.max(axis=0) - # Expected values based on the cache run - print(max_values) - assert max_values[0] == 5, "Expected first dimension max value to be 5" - assert max_values[1] == 15, "Expected token ids to go up to 15" - assert max_values[2] > 32700, "Expected latent dimension around 32768" - - -def test_split_files_created(cache_setup: dict[str, Any]): - """ - Test that exactly 5 cache split files have been created. - """ - save_dir = cache_setup["temp_dir"] / "layers.1" - cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")] - print(cache_files) - assert len(cache_files) == 5, "Expected 5 split files in the cache directory" - - -def test_split_file_contents(cache_setup: dict[str, Any]): - """ - Test that one of the split files (loaded via safetensors) holds convincing data: - - latent locations and activations have the same number of entries, - - tokens were correctly stored and match the input tokens. - - latent max values are as expected. - """ - save_dir = cache_setup["temp_dir"] / "layers.1" - tokens = cache_setup["tokens"] - # Choose one file to verify - cache_files = os.listdir(save_dir) - file_path = Path(save_dir) / cache_files[0] - saved_cache = load_file(str(file_path)) - - locations = saved_cache["locations"] - activations = saved_cache["activations"] - tokens = saved_cache["tokens"] - - assert len(locations) == len( - activations - ), "Mismatch between locations & activations entries" - - np.testing.assert_array_equal( - tokens, - tokens[:12, :], - err_msg="Tokens saved do not match the input tokens", - ) - max_values = locations.max(axis=0) - print(max_values) - assert max_values[0] == 5, "Max batch index mismatch in saved file" - assert max_values[1] == 15, "Max token value mismatch in saved file" - assert max_values[2] > 6499, "Latent dimension mismatch in saved file" - - -def test_config_file(cache_setup: dict[str, Any]): - """ - Test that the saved configuration file contains the correct parameters. - """ - config_path = cache_setup["temp_dir"] / "layers.1" / "config.json" - with open(config_path, "r") as f: - config = json.load(f) - cache_cfg = cache_setup["cache_cfg"] - print(cache_cfg) - assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch" - assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "Cache_ctx_len mismatch" - assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch" From 1da8631d4bebd72f435e96405a94659f071868ec Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:09:00 +0000 Subject: [PATCH 07/22] Constructor again --- .github/workflows/build.yml | 2 +- tests/test_latents/test_constructor.py | 72 +++++++++++--------------- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a272de8a..cbe66202 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev,visualize]" - name: Check space - run: du -sh + run: du -sh ../. - name: Run tests run: pytest - name: build diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index 467616cc..4f906d01 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -1,48 +1,38 @@ -# import random -# from itertools import chain -# from typing import Any, Literal +from typing import Any -# import pytest -# import torch -# from jaxtyping import Int -# from torch import Tensor -# from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast +from jaxtyping import Int +from torch import Tensor +from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast -# from delphi.config import ConstructorConfig, SamplerConfig -# from delphi.latents import ( -# ActivatingExample, -# Latent, -# LatentDataset, -# LatentRecord, -# constructor, -# sampler, -# ) -# from delphi.latents.latents import ActivationData +from delphi.config import ConstructorConfig, SamplerConfig +from delphi.latents import ( + LatentDataset, +) -# def test_save_load_cache( -# cache_setup: dict[str, Any], -# tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, -# ): -# sampler_cfg = SamplerConfig( -# n_examples_train=3, -# n_examples_test=3, -# n_quantiles=3, -# train_type="quantiles", -# test_type="quantiles", -# ) -# dataset = LatentDataset( -# cache_setup["temp_dir"], -# sampler_cfg, -# ConstructorConfig(min_examples=0), -# tokenizer, -# ) -# tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore -# assert (tokens == cache_setup["tokens"][: len(tokens)]).all() -# for record in dataset: -# print(record) -# assert len(record.train) <= sampler_cfg.n_examples_train -# assert len(record.test) <= sampler_cfg.n_examples_test +def test_save_load_cache( + cache_setup: dict[str, Any], + tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, +): + sampler_cfg = SamplerConfig( + n_examples_train=3, + n_examples_test=3, + n_quantiles=3, + train_type="quantiles", + test_type="quantiles", + ) + dataset = LatentDataset( + cache_setup["temp_dir"], + sampler_cfg, + ConstructorConfig(min_examples=0), + tokenizer, + ) + tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore + assert (tokens == cache_setup["tokens"][: len(tokens)]).all() + for record in dataset: + print(record) + assert len(record.train) <= sampler_cfg.n_examples_train + assert len(record.test) <= sampler_cfg.n_examples_test # @pytest.fixture(scope="module") From cbc05e32e1b001543fc54e0073e300b7cbc6ba14 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:13:53 +0000 Subject: [PATCH 08/22] No asserts --- tests/test_latents/test_constructor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index 4f906d01..1d8d4102 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -28,11 +28,12 @@ def test_save_load_cache( tokenizer, ) tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore - assert (tokens == cache_setup["tokens"][: len(tokens)]).all() + # assert (tokens == cache_setup["tokens"][: len(tokens)]).all() + print(tokens.shape) for record in dataset: print(record) - assert len(record.train) <= sampler_cfg.n_examples_train - assert len(record.test) <= sampler_cfg.n_examples_test + # assert len(record.train) <= sampler_cfg.n_examples_train + # assert len(record.test) <= sampler_cfg.n_examples_test # @pytest.fixture(scope="module") From 9dd154456483c54346ae706a3eee7ad57d47a2c5 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:17:22 +0000 Subject: [PATCH 09/22] add prints --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cbe66202..99ef07a3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,7 @@ jobs: - name: Check space run: du -sh ../. - name: Run tests - run: pytest + run: pytest -s - name: build run: pip wheel --no-deps -w dist . release: From 8df50e4810b57b0089e09045c00bc2db0c1ab148 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:22:09 +0000 Subject: [PATCH 10/22] check space better --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 99ef07a3..13750e75 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev,visualize]" - name: Check space - run: du -sh ../. + run: du du -sh /* - name: Run tests run: pytest -s - name: build From a5a5760ad1add00e2c1eab20a5b2417d236d3e78 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:22:37 +0000 Subject: [PATCH 11/22] now correct --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 13750e75..c31b983e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev,visualize]" - name: Check space - run: du du -sh /* + run: du -sh /* - name: Run tests run: pytest -s - name: build From eac9d6d39af61095e02e13707b2e386d3e70d093 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:28:40 +0000 Subject: [PATCH 12/22] local --- .github/workflows/build.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c31b983e..490cc9f5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ on: - main jobs: build: - runs-on: ubuntu-latest + runs-on: gpaulo-ord-0 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -22,8 +22,6 @@ jobs: run: | python -m pip install --upgrade pip pip install -e ".[dev,visualize]" - - name: Check space - run: du -sh /* - name: Run tests run: pytest -s - name: build From 7efb5fc199a15a645a7c76b4b3b150402ffc13c1 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 14:30:10 +0000 Subject: [PATCH 13/22] local --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 490cc9f5..dcd24176 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ on: - main jobs: build: - runs-on: gpaulo-ord-0 + runs-on: self-hosted steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From 84a01a4e08dedd8e61bf9de72e6e399e2d64d824 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:10:05 +0000 Subject: [PATCH 14/22] Add back all tests --- tests/test_latents/test_cache.py | 136 +++++++++--------- tests/test_latents/test_constructor.py | 188 ++++++++++++------------- 2 files changed, 162 insertions(+), 162 deletions(-) diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py index 0fe42207..9b281f97 100644 --- a/tests/test_latents/test_cache.py +++ b/tests/test_latents/test_cache.py @@ -1,80 +1,80 @@ -# import json -# import os -# from pathlib import Path -# from typing import Any +import json +import os +from pathlib import Path +from typing import Any -# import numpy as np -# from safetensors.numpy import load_file +import numpy as np +from safetensors.numpy import load_file -# def test_latent_locations(cache_setup: dict[str, Any]): -# """ -# Test that the latent locations generated in memory have the expected -# shape and values. -# """ -# cache = cache_setup["cache"] -# locations = cache.cache.latent_locations["layers.1"] -# max_values, _ = locations.max(axis=0) -# # Expected values based on the cache run -# print(max_values) -# assert max_values[0] == 5, "Expected first dimension max value to be 5" -# assert max_values[1] == 15, "Expected token ids to go up to 15" -# assert max_values[2] > 32700, "Expected latent dimension around 32768" +def test_latent_locations(cache_setup: dict[str, Any]): + """ + Test that the latent locations generated in memory have the expected + shape and values. + """ + cache = cache_setup["cache"] + locations = cache.cache.latent_locations["layers.1"] + max_values, _ = locations.max(axis=0) + # Expected values based on the cache run + print(max_values) + assert max_values[0] == 5, "Expected first dimension max value to be 5" + assert max_values[1] == 15, "Expected token ids to go up to 15" + assert max_values[2] > 32700, "Expected latent dimension around 32768" -# def test_split_files_created(cache_setup: dict[str, Any]): -# """ -# Test that exactly 5 cache split files have been created. -# """ -# save_dir = cache_setup["temp_dir"] / "layers.1" -# cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")] -# print(cache_files) -# assert len(cache_files) == 5, "Expected 5 split files in the cache directory" +def test_split_files_created(cache_setup: dict[str, Any]): + """ + Test that exactly 5 cache split files have been created. + """ + save_dir = cache_setup["temp_dir"] / "layers.1" + cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")] + print(cache_files) + assert len(cache_files) == 5, "Expected 5 split files in the cache directory" -# def test_split_file_contents(cache_setup: dict[str, Any]): -# """ -# Test that one of the split files (loaded via safetensors) holds convincing data: -# - latent locations and activations have the same number of entries, -# - tokens were correctly stored and match the input tokens. -# - latent max values are as expected. -# """ -# save_dir = cache_setup["temp_dir"] / "layers.1" -# tokens = cache_setup["tokens"] -# # Choose one file to verify -# cache_files = os.listdir(save_dir) -# file_path = Path(save_dir) / cache_files[0] -# saved_cache = load_file(str(file_path)) +def test_split_file_contents(cache_setup: dict[str, Any]): + """ + Test that one of the split files (loaded via safetensors) holds convincing data: + - latent locations and activations have the same number of entries, + - tokens were correctly stored and match the input tokens. + - latent max values are as expected. + """ + save_dir = cache_setup["temp_dir"] / "layers.1" + tokens = cache_setup["tokens"] + # Choose one file to verify + cache_files = os.listdir(save_dir) + file_path = Path(save_dir) / cache_files[0] + saved_cache = load_file(str(file_path)) -# locations = saved_cache["locations"] -# activations = saved_cache["activations"] -# tokens = saved_cache["tokens"] + locations = saved_cache["locations"] + activations = saved_cache["activations"] + tokens = saved_cache["tokens"] -# assert len(locations) == len( -# activations -# ), "Mismatch between locations & activations entries" + assert len(locations) == len( + activations + ), "Mismatch between locations & activations entries" -# np.testing.assert_array_equal( -# tokens, -# tokens[:12, :], -# err_msg="Tokens saved do not match the input tokens", -# ) -# max_values = locations.max(axis=0) -# print(max_values) -# assert max_values[0] == 5, "Max batch index mismatch in saved file" -# assert max_values[1] == 15, "Max token value mismatch in saved file" -# assert max_values[2] > 6499, "Latent dimension mismatch in saved file" + np.testing.assert_array_equal( + tokens, + tokens[:12, :], + err_msg="Tokens saved do not match the input tokens", + ) + max_values = locations.max(axis=0) + print(max_values) + assert max_values[0] == 5, "Max batch index mismatch in saved file" + assert max_values[1] == 15, "Max token value mismatch in saved file" + assert max_values[2] > 6499, "Latent dimension mismatch in saved file" -# def test_config_file(cache_setup: dict[str, Any]): -# """ -# Test that the saved configuration file contains the correct parameters. -# """ -# config_path = cache_setup["temp_dir"] / "layers.1" / "config.json" -# with open(config_path, "r") as f: -# config = json.load(f) -# cache_cfg = cache_setup["cache_cfg"] -# print(cache_cfg) -# assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch" -# assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch" -# assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch" +def test_config_file(cache_setup: dict[str, Any]): + """ + Test that the saved configuration file contains the correct parameters. + """ + config_path = cache_setup["temp_dir"] / "layers.1" / "config.json" + with open(config_path, "r") as f: + config = json.load(f) + cache_cfg = cache_setup["cache_cfg"] + print(cache_cfg) + assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch" + assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch" + assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch" diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index 1d8d4102..a0034d33 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -28,103 +28,103 @@ def test_save_load_cache( tokenizer, ) tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore - # assert (tokens == cache_setup["tokens"][: len(tokens)]).all() + assert (tokens == cache_setup["tokens"][: len(tokens)]).all() print(tokens.shape) for record in dataset: - print(record) - # assert len(record.train) <= sampler_cfg.n_examples_train - # assert len(record.test) <= sampler_cfg.n_examples_test + print(len(record.train), len(record.test)) + assert len(record.train) <= sampler_cfg.n_examples_train + assert len(record.test) <= sampler_cfg.n_examples_test -# @pytest.fixture(scope="module") -# def seed(): -# random.seed(0) -# torch.manual_seed(0) +@pytest.fixture(scope="module") +def seed(): + random.seed(0) + torch.manual_seed(0) -# @pytest.mark.parametrize("n_samples", [5, 10, 100, 1000]) -# @pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23]) -# @pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20]) -# @pytest.mark.parametrize("train_type", ["top", "random", "quantiles"]) -# def test_simple_cache( -# n_samples: int, -# n_quantiles: int, -# n_examples: int, -# train_type: Literal["top", "random", "quantiles"], -# ctx_len: int = 32, -# seed: None = None, -# *, -# tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, -# ): -# torch.manual_seed(0) -# tokens = torch.randint( -# 0, -# 100, -# ( -# n_samples, -# ctx_len, -# ), -# ) -# all_activation_data = [] -# all_activations = [] -# for feature_idx in range(2): -# activations = torch.rand(n_samples, ctx_len, 1) * ( -# torch.rand(n_samples)[..., None, None] ** 2 -# ) -# all_activations.append(activations) -# mask = activations > 0.1 -# locations = torch.nonzero(mask) -# locations[..., 2] = feature_idx -# all_activation_data.append(ActivationData(locations, activations[mask])) -# activation_data, other_activation_data = all_activation_data -# activations, other_activations = all_activations -# record = LatentRecord(latent=Latent("test", 0), examples=[]) -# constructor( -# record, -# activation_data, -# constructor_cfg=ConstructorConfig( -# example_ctx_len=ctx_len, -# min_examples=1, -# n_non_activating=50, -# non_activating_source="neighbours", -# ), -# tokens=tokens, -# tokenizer=tokenizer, -# all_data={0: activation_data, 1: other_activation_data}, -# ) -# for i, j in zip(record.examples[:-1], record.examples[1:]): -# assert i.max_activation >= j.max_activation -# for i in record.examples: -# index = (tokens == i.tokens).all(dim=-1).float().argmax() -# assert (tokens[index] == i.tokens).all() -# assert activations[index].max() == i.max_activation -# sampler( -# record, -# SamplerConfig( -# n_examples_train=n_examples, -# n_examples_test=n_examples, -# n_quantiles=n_quantiles, -# train_type=train_type, -# test_type="quantiles", -# ), -# tokenizer=tokenizer, -# ) -# assert len(record.train) <= n_examples -# assert len(record.test) <= n_examples -# for neighbor in record.neighbours: -# assert neighbor.latent_index == 1 -# for example in chain(record.train, record.test): -# assert isinstance(example, ActivatingExample) -# assert example.normalized_activations is not None -# assert example.normalized_activations.shape == example.activations.shape -# assert (example.normalized_activations <= 10).all() -# assert (example.normalized_activations >= 0).all() -# for quantile_list in (record.test,) + ( # type: ignore -# (record.train,) if train_type == "quantiles" else () -# ): -# quantile_list: list[ActivatingExample] = quantile_list -# for k, i in enumerate(quantile_list): -# for j in quantile_list[k + 1 :]: -# if i.quantile != j.quantile: -# assert i.max_activation >= j.max_activation -# assert i.quantile < j.quantile +@pytest.mark.parametrize("n_samples", [5, 10, 100, 1000]) +@pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23]) +@pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20]) +@pytest.mark.parametrize("train_type", ["top", "random", "quantiles"]) +def test_simple_cache( + n_samples: int, + n_quantiles: int, + n_examples: int, + train_type: Literal["top", "random", "quantiles"], + ctx_len: int = 32, + seed: None = None, + *, + tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, +): + torch.manual_seed(0) + tokens = torch.randint( + 0, + 100, + ( + n_samples, + ctx_len, + ), + ) + all_activation_data = [] + all_activations = [] + for feature_idx in range(2): + activations = torch.rand(n_samples, ctx_len, 1) * ( + torch.rand(n_samples)[..., None, None] ** 2 + ) + all_activations.append(activations) + mask = activations > 0.1 + locations = torch.nonzero(mask) + locations[..., 2] = feature_idx + all_activation_data.append(ActivationData(locations, activations[mask])) + activation_data, other_activation_data = all_activation_data + activations, other_activations = all_activations + record = LatentRecord(latent=Latent("test", 0), examples=[]) + constructor( + record, + activation_data, + constructor_cfg=ConstructorConfig( + example_ctx_len=ctx_len, + min_examples=1, + n_non_activating=50, + non_activating_source="neighbours", + ), + tokens=tokens, + tokenizer=tokenizer, + all_data={0: activation_data, 1: other_activation_data}, + ) + for i, j in zip(record.examples[:-1], record.examples[1:]): + assert i.max_activation >= j.max_activation + for i in record.examples: + index = (tokens == i.tokens).all(dim=-1).float().argmax() + assert (tokens[index] == i.tokens).all() + assert activations[index].max() == i.max_activation + sampler( + record, + SamplerConfig( + n_examples_train=n_examples, + n_examples_test=n_examples, + n_quantiles=n_quantiles, + train_type=train_type, + test_type="quantiles", + ), + tokenizer=tokenizer, + ) + assert len(record.train) <= n_examples + assert len(record.test) <= n_examples + for neighbor in record.neighbours: + assert neighbor.latent_index == 1 + for example in chain(record.train, record.test): + assert isinstance(example, ActivatingExample) + assert example.normalized_activations is not None + assert example.normalized_activations.shape == example.activations.shape + assert (example.normalized_activations <= 10).all() + assert (example.normalized_activations >= 0).all() + for quantile_list in (record.test,) + ( # type: ignore + (record.train,) if train_type == "quantiles" else () + ): + quantile_list: list[ActivatingExample] = quantile_list + for k, i in enumerate(quantile_list): + for j in quantile_list[k + 1 :]: + if i.quantile != j.quantile: + assert i.max_activation >= j.max_activation + assert i.quantile < j.quantile From dfdcc7027d381304654bcca69b7099bacab4c4cd Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:20:07 +0000 Subject: [PATCH 15/22] cloud --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dcd24176..908921b3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ on: - main jobs: build: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e ".[dev,visualize]" + pip install -e ".[dev]" - name: Run tests run: pytest -s - name: build From 061b21d29f818c6929a6e020efa70ce804b82f59 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:27:40 +0000 Subject: [PATCH 16/22] why --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 908921b3..796a6dcb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e ".[dev]" + pip install -e ".[dev,visualize]" - name: Run tests run: pytest -s - name: build From 19afccda6473e5606db190ae4b0fb049f77756b8 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:38:12 +0000 Subject: [PATCH 17/22] python --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 796a6dcb..3414c4cf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.12" - name: Install dependencies run: | python -m pip install --upgrade pip @@ -40,7 +40,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.12" - name: Install dependencies run: pip install build twine - name: Semantic Release From 261bdbcbc058e127daedfa8fbb19f26dfbb0692e Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:47:05 +0000 Subject: [PATCH 18/22] import pytest? --- tests/test_latents/test_constructor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index a0034d33..a024d858 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -1,5 +1,6 @@ from typing import Any +import pytest from jaxtyping import Int from torch import Tensor from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast From ed9edeaf81a907700bdaea17187c9d2a6f1bbd02 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:52:02 +0000 Subject: [PATCH 19/22] fix imports --- tests/test_latents/test_constructor.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index a024d858..c64238f9 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -1,14 +1,23 @@ -from typing import Any +import random +from itertools import chain +from typing import Any, Literal import pytest +import torch from jaxtyping import Int from torch import Tensor from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast from delphi.config import ConstructorConfig, SamplerConfig from delphi.latents import ( + ActivatingExample, + Latent, LatentDataset, + LatentRecord, + constructor, + sampler, ) +from delphi.latents.latents import ActivationData def test_save_load_cache( From 965cf3e2e21de3d63a553f30824e352ff199b0a7 Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 15:58:42 +0000 Subject: [PATCH 20/22] smaller pythia --- tests/conftest.py | 116 +++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 345a4661..64e1fd94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,14 +35,14 @@ @pytest.fixture(scope="module") def tokenizer() -> PreTrainedTokenizer | PreTrainedTokenizerFast: - tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-160m") + tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-70m") tokenizer.pad_token = tokenizer.eos_token return tokenizer @pytest.fixture(scope="module") def model() -> PreTrainedModel: - model = AutoModel.from_pretrained("EleutherAI/pythia-160m") + model = AutoModel.from_pretrained("EleutherAI/pythia-70m") return model @@ -73,7 +73,7 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM sampler_cfg=SamplerConfig(), cache_cfg=cache_cfg, model="EleutherAI/pythia-160m", - sparse_model="EleutherAI/sae-pythia-160m-32k", + sparse_model="EleutherAI/sae-pythia-70m-32k", hookpoints=["layers.1"], ) hookpoint_to_sparse_encode, _ = load_hooks_sparse_coders(model, run_cfg_gemma) @@ -98,66 +98,66 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM # Save the cache config - cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-160m") - # hookpoint_firing_counts = torch.load( - # log_path / "hookpoint_firing_counts.pt", weights_only=True - # ) + cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-70m") + hookpoint_firing_counts = torch.load( + log_path / "hookpoint_firing_counts.pt", weights_only=True + ) return { "cache": cache, "tokens": tokens, "cache_cfg": cache_cfg, "temp_dir": temp_dir, - # "firing_counts": hookpoint_firing_counts, + "firing_counts": hookpoint_firing_counts, } -# def test_hookpoint_firing_counts_initialization(cache_setup): -# """ -# Ensure that hookpoint_firing_counts is initialized as an empty dictionary. -# """ -# cache = cache_setup["cache"] -# assert isinstance(cache.hookpoint_firing_counts, dict) -# assert len(cache.hookpoint_firing_counts) == 0 # Should be empty before run() - - -# def test_hookpoint_firing_counts_updates(cache_setup): -# """ -# Ensure that hookpoint_firing_counts is properly updated after running the cache. -# """ -# cache = cache_setup["cache"] -# tokens = cache_setup["tokens"] -# cache.run(cache_setup["cache_cfg"].n_tokens, tokens) - -# assert ( -# len(cache.hookpoint_firing_counts) > 0 -# ), "hookpoint_firing_counts should not be empty after run()" -# for hookpoint, counts in cache.hookpoint_firing_counts.items(): -# assert isinstance( -# counts, torch.Tensor -# ), f"Counts for {hookpoint} should be a torch.Tensor" -# assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor" -# assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative" - - -# def test_hookpoint_firing_counts_persistence(cache_setup): -# """ -# Ensure that hookpoint_firing_counts are correctly saved and loaded. -# """ -# cache = cache_setup["cache"] -# cache.save_firing_counts() - -# firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt" -# assert firing_counts_path.exists(), "Firing counts file should exist after saving" - -# loaded_counts = torch.load(firing_counts_path, weights_only=True) -# assert isinstance( -# loaded_counts, dict -# ), "Loaded firing counts should be a dictionary" -# assert ( -# loaded_counts.keys() == cache.hookpoint_firing_counts.keys() -# ), "Loaded firing counts keys should match saved keys" - -# for hookpoint, counts in loaded_counts.items(): -# assert torch.equal( -# counts, cache.hookpoint_firing_counts[hookpoint] -# ), f"Mismatch in firing counts for {hookpoint}" +def test_hookpoint_firing_counts_initialization(cache_setup): + """ + Ensure that hookpoint_firing_counts is initialized as an empty dictionary. + """ + cache = cache_setup["cache"] + assert isinstance(cache.hookpoint_firing_counts, dict) + assert len(cache.hookpoint_firing_counts) == 0 # Should be empty before run() + + +def test_hookpoint_firing_counts_updates(cache_setup): + """ + Ensure that hookpoint_firing_counts is properly updated after running the cache. + """ + cache = cache_setup["cache"] + tokens = cache_setup["tokens"] + cache.run(cache_setup["cache_cfg"].n_tokens, tokens) + + assert ( + len(cache.hookpoint_firing_counts) > 0 + ), "hookpoint_firing_counts should not be empty after run()" + for hookpoint, counts in cache.hookpoint_firing_counts.items(): + assert isinstance( + counts, torch.Tensor + ), f"Counts for {hookpoint} should be a torch.Tensor" + assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor" + assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative" + + +def test_hookpoint_firing_counts_persistence(cache_setup): + """ + Ensure that hookpoint_firing_counts are correctly saved and loaded. + """ + cache = cache_setup["cache"] + cache.save_firing_counts() + + firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt" + assert firing_counts_path.exists(), "Firing counts file should exist after saving" + + loaded_counts = torch.load(firing_counts_path, weights_only=True) + assert isinstance( + loaded_counts, dict + ), "Loaded firing counts should be a dictionary" + assert ( + loaded_counts.keys() == cache.hookpoint_firing_counts.keys() + ), "Loaded firing counts keys should match saved keys" + + for hookpoint, counts in loaded_counts.items(): + assert torch.equal( + counts, cache.hookpoint_firing_counts[hookpoint] + ), f"Mismatch in firing counts for {hookpoint}" From 96247ffd32300ff38ee776e0f0703e8ae4d275ed Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 16:07:30 +0000 Subject: [PATCH 21/22] remove print --- tests/test_latents/test_cache.py | 4 ---- tests/test_latents/test_constructor.py | 2 -- 2 files changed, 6 deletions(-) diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py index 9b281f97..82a93647 100644 --- a/tests/test_latents/test_cache.py +++ b/tests/test_latents/test_cache.py @@ -16,7 +16,6 @@ def test_latent_locations(cache_setup: dict[str, Any]): locations = cache.cache.latent_locations["layers.1"] max_values, _ = locations.max(axis=0) # Expected values based on the cache run - print(max_values) assert max_values[0] == 5, "Expected first dimension max value to be 5" assert max_values[1] == 15, "Expected token ids to go up to 15" assert max_values[2] > 32700, "Expected latent dimension around 32768" @@ -28,7 +27,6 @@ def test_split_files_created(cache_setup: dict[str, Any]): """ save_dir = cache_setup["temp_dir"] / "layers.1" cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")] - print(cache_files) assert len(cache_files) == 5, "Expected 5 split files in the cache directory" @@ -60,7 +58,6 @@ def test_split_file_contents(cache_setup: dict[str, Any]): err_msg="Tokens saved do not match the input tokens", ) max_values = locations.max(axis=0) - print(max_values) assert max_values[0] == 5, "Max batch index mismatch in saved file" assert max_values[1] == 15, "Max token value mismatch in saved file" assert max_values[2] > 6499, "Latent dimension mismatch in saved file" @@ -74,7 +71,6 @@ def test_config_file(cache_setup: dict[str, Any]): with open(config_path, "r") as f: config = json.load(f) cache_cfg = cache_setup["cache_cfg"] - print(cache_cfg) assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch" assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch" assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch" diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py index c64238f9..8f74afdd 100644 --- a/tests/test_latents/test_constructor.py +++ b/tests/test_latents/test_constructor.py @@ -39,9 +39,7 @@ def test_save_load_cache( ) tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens() # type: ignore assert (tokens == cache_setup["tokens"][: len(tokens)]).all() - print(tokens.shape) for record in dataset: - print(len(record.train), len(record.test)) assert len(record.train) <= sampler_cfg.n_examples_train assert len(record.test) <= sampler_cfg.n_examples_test From 7189ce1332d69e066b59b84e9b1734c7c7da941e Mon Sep 17 00:00:00 2001 From: SrGonao Date: Mon, 2 Jun 2025 16:09:50 +0000 Subject: [PATCH 22/22] remove -s --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3414c4cf..c7c0ecfc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev,visualize]" - name: Run tests - run: pytest -s + run: pytest - name: build run: pip wheel --no-deps -w dist . release: