From 6376f8a8f8353c11b2e1f52110cfb7b6acfb71f2 Mon Sep 17 00:00:00 2001
From: Goncalo Paulo <30472805+SrGonao@users.noreply.github.com>
Date: Mon, 2 Jun 2025 14:17:50 +0100
Subject: [PATCH 01/22] Update build.yml

---
 .github/workflows/build.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5ada2a99..a272de8a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -22,6 +22,8 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -e ".[dev,visualize]"
+      - name: Check space
+        run: du -sh
       - name: Run tests
         run: pytest
       - name: build

From 0cb78b34765c903409e142118df0c4da3f3650cf Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 13:19:53 +0000
Subject: [PATCH 02/22] Debug tests

---
 tests/conftest.py                | 2 +-
 tests/test_latents/test_cache.py | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 34376088..f7b787e8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -98,7 +98,7 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM
 
     # Save the cache config
 
-    cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-70m")
+    cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-160m")
     hookpoint_firing_counts = torch.load(
         log_path / "hookpoint_firing_counts.pt", weights_only=True
     )
diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py
index c62c1cd6..e4bf5853 100644
--- a/tests/test_latents/test_cache.py
+++ b/tests/test_latents/test_cache.py
@@ -16,6 +16,7 @@ def test_latent_locations(cache_setup: dict[str, Any]):
     locations = cache.cache.latent_locations["layers.1"]
     max_values, _ = locations.max(axis=0)
     # Expected values based on the cache run
+    print(max_values)
     assert max_values[0] == 5, "Expected first dimension max value to be 5"
     assert max_values[1] == 15, "Expected token ids to go up to 15"
     assert max_values[2] > 32700, "Expected latent dimension around 32768"
@@ -27,6 +28,7 @@ def test_split_files_created(cache_setup: dict[str, Any]):
     """
     save_dir = cache_setup["temp_dir"] / "layers.1"
     cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")]
+    print(cache_files)
     assert len(cache_files) == 5, "Expected 5 split files in the cache directory"
 
 
@@ -58,6 +60,7 @@ def test_split_file_contents(cache_setup: dict[str, Any]):
         err_msg="Tokens saved do not match the input tokens",
     )
     max_values = locations.max(axis=0)
+    print(max_values)
     assert max_values[0] == 5, "Max batch index mismatch in saved file"
     assert max_values[1] == 15, "Max token value mismatch in saved file"
     assert max_values[2] > 6499, "Latent dimension mismatch in saved file"

From 00fe96f7b64a19ae9a7e0b895e24bbb56ceecac2 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 13:23:29 +0000
Subject: [PATCH 03/22] More prints

---
 tests/test_latents/test_cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py
index e4bf5853..850928aa 100644
--- a/tests/test_latents/test_cache.py
+++ b/tests/test_latents/test_cache.py
@@ -74,7 +74,7 @@ def test_config_file(cache_setup: dict[str, Any]):
     with open(config_path, "r") as f:
         config = json.load(f)
     cache_cfg = cache_setup["cache_cfg"]
-
+    print(cache_cfg)
     assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch"
     assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "Cache_ctx_len mismatch"
     assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch"

From e7ce605aeec089c5cd6d7b3203b97185f1310518 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 13:35:40 +0000
Subject: [PATCH 04/22] test

---
 tests/test_latents/{test_cache.py => ucache.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/test_latents/{test_cache.py => ucache.py} (100%)

diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/ucache.py
similarity index 100%
rename from tests/test_latents/test_cache.py
rename to tests/test_latents/ucache.py

From 93790c349b481e761f6d41b86b62b67753cf07dd Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 13:57:23 +0000
Subject: [PATCH 05/22] no more firing counts

---
 tests/conftest.py | 108 +++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 54 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index f7b787e8..345a4661 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -99,65 +99,65 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM
     # Save the cache config
 
     cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-160m")
-    hookpoint_firing_counts = torch.load(
-        log_path / "hookpoint_firing_counts.pt", weights_only=True
-    )
+    # hookpoint_firing_counts = torch.load(
+    #     log_path / "hookpoint_firing_counts.pt", weights_only=True
+    # )
     return {
         "cache": cache,
         "tokens": tokens,
         "cache_cfg": cache_cfg,
         "temp_dir": temp_dir,
-        "firing_counts": hookpoint_firing_counts,
+        #    "firing_counts": hookpoint_firing_counts,
     }
 
 
-def test_hookpoint_firing_counts_initialization(cache_setup):
-    """
-    Ensure that hookpoint_firing_counts is initialized as an empty dictionary.
-    """
-    cache = cache_setup["cache"]
-    assert isinstance(cache.hookpoint_firing_counts, dict)
-    assert len(cache.hookpoint_firing_counts) == 0  # Should be empty before run()
-
-
-def test_hookpoint_firing_counts_updates(cache_setup):
-    """
-    Ensure that hookpoint_firing_counts is properly updated after running the cache.
-    """
-    cache = cache_setup["cache"]
-    tokens = cache_setup["tokens"]
-    cache.run(cache_setup["cache_cfg"].n_tokens, tokens)
-
-    assert (
-        len(cache.hookpoint_firing_counts) > 0
-    ), "hookpoint_firing_counts should not be empty after run()"
-    for hookpoint, counts in cache.hookpoint_firing_counts.items():
-        assert isinstance(
-            counts, torch.Tensor
-        ), f"Counts for {hookpoint} should be a torch.Tensor"
-        assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor"
-        assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative"
-
-
-def test_hookpoint_firing_counts_persistence(cache_setup):
-    """
-    Ensure that hookpoint_firing_counts are correctly saved and loaded.
-    """
-    cache = cache_setup["cache"]
-    cache.save_firing_counts()
-
-    firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt"
-    assert firing_counts_path.exists(), "Firing counts file should exist after saving"
-
-    loaded_counts = torch.load(firing_counts_path, weights_only=True)
-    assert isinstance(
-        loaded_counts, dict
-    ), "Loaded firing counts should be a dictionary"
-    assert (
-        loaded_counts.keys() == cache.hookpoint_firing_counts.keys()
-    ), "Loaded firing counts keys should match saved keys"
-
-    for hookpoint, counts in loaded_counts.items():
-        assert torch.equal(
-            counts, cache.hookpoint_firing_counts[hookpoint]
-        ), f"Mismatch in firing counts for {hookpoint}"
+# def test_hookpoint_firing_counts_initialization(cache_setup):
+#     """
+#     Ensure that hookpoint_firing_counts is initialized as an empty dictionary.
+#     """
+#     cache = cache_setup["cache"]
+#     assert isinstance(cache.hookpoint_firing_counts, dict)
+#     assert len(cache.hookpoint_firing_counts) == 0  # Should be empty before run()
+
+
+# def test_hookpoint_firing_counts_updates(cache_setup):
+#     """
+#     Ensure that hookpoint_firing_counts is properly updated after running the cache.
+#     """
+#     cache = cache_setup["cache"]
+#     tokens = cache_setup["tokens"]
+#     cache.run(cache_setup["cache_cfg"].n_tokens, tokens)
+
+#     assert (
+#         len(cache.hookpoint_firing_counts) > 0
+#     ), "hookpoint_firing_counts should not be empty after run()"
+#     for hookpoint, counts in cache.hookpoint_firing_counts.items():
+#         assert isinstance(
+#             counts, torch.Tensor
+#         ), f"Counts for {hookpoint} should be a torch.Tensor"
+#         assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor"
+#         assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative"
+
+
+# def test_hookpoint_firing_counts_persistence(cache_setup):
+#     """
+#     Ensure that hookpoint_firing_counts are correctly saved and loaded.
+#     """
+#     cache = cache_setup["cache"]
+#     cache.save_firing_counts()
+
+#     firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt"
+#     assert firing_counts_path.exists(), "Firing counts file should exist after saving"
+
+#     loaded_counts = torch.load(firing_counts_path, weights_only=True)
+#     assert isinstance(
+#         loaded_counts, dict
+#     ), "Loaded firing counts should be a dictionary"
+#     assert (
+#         loaded_counts.keys() == cache.hookpoint_firing_counts.keys()
+#     ), "Loaded firing counts keys should match saved keys"
+
+#     for hookpoint, counts in loaded_counts.items():
+#         assert torch.equal(
+#             counts, cache.hookpoint_firing_counts[hookpoint]
+#         ), f"Mismatch in firing counts for {hookpoint}"

From 0423d0b53d1d821e5d4cd485a7c3e9041e527d5a Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:03:12 +0000
Subject: [PATCH 06/22] No tests

---
 tests/test_latents/test_cache.py       |  80 ++++++++
 tests/test_latents/test_constructor.py | 262 ++++++++++++-------------
 tests/test_latents/ucache.py           |  80 --------
 3 files changed, 211 insertions(+), 211 deletions(-)
 create mode 100644 tests/test_latents/test_cache.py
 delete mode 100644 tests/test_latents/ucache.py

diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py
new file mode 100644
index 00000000..0fe42207
--- /dev/null
+++ b/tests/test_latents/test_cache.py
@@ -0,0 +1,80 @@
+# import json
+# import os
+# from pathlib import Path
+# from typing import Any
+
+# import numpy as np
+# from safetensors.numpy import load_file
+
+
+# def test_latent_locations(cache_setup: dict[str, Any]):
+#     """
+#     Test that the latent locations generated in memory have the expected
+#     shape and values.
+#     """
+#     cache = cache_setup["cache"]
+#     locations = cache.cache.latent_locations["layers.1"]
+#     max_values, _ = locations.max(axis=0)
+#     # Expected values based on the cache run
+#     print(max_values)
+#     assert max_values[0] == 5, "Expected first dimension max value to be 5"
+#     assert max_values[1] == 15, "Expected token ids to go up to 15"
+#     assert max_values[2] > 32700, "Expected latent dimension around 32768"
+
+
+# def test_split_files_created(cache_setup: dict[str, Any]):
+#     """
+#     Test that exactly 5 cache split files have been created.
+#     """
+#     save_dir = cache_setup["temp_dir"] / "layers.1"
+#     cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")]
+#     print(cache_files)
+#     assert len(cache_files) == 5, "Expected 5 split files in the cache directory"
+
+
+# def test_split_file_contents(cache_setup: dict[str, Any]):
+#     """
+#     Test that one of the split files (loaded via safetensors) holds convincing data:
+#     - latent locations and activations have the same number of entries,
+#     - tokens were correctly stored and match the input tokens.
+#     - latent max values are as expected.
+#     """
+#     save_dir = cache_setup["temp_dir"] / "layers.1"
+#     tokens = cache_setup["tokens"]
+#     # Choose one file to verify
+#     cache_files = os.listdir(save_dir)
+#     file_path = Path(save_dir) / cache_files[0]
+#     saved_cache = load_file(str(file_path))
+
+#     locations = saved_cache["locations"]
+#     activations = saved_cache["activations"]
+#     tokens = saved_cache["tokens"]
+
+#     assert len(locations) == len(
+#         activations
+#     ), "Mismatch between locations & activations entries"
+
+#     np.testing.assert_array_equal(
+#         tokens,
+#         tokens[:12, :],
+#         err_msg="Tokens saved do not match the input tokens",
+#     )
+#     max_values = locations.max(axis=0)
+#     print(max_values)
+#     assert max_values[0] == 5, "Max batch index mismatch in saved file"
+#     assert max_values[1] == 15, "Max token value mismatch in saved file"
+#     assert max_values[2] > 6499, "Latent dimension mismatch in saved file"
+
+
+# def test_config_file(cache_setup: dict[str, Any]):
+#     """
+#     Test that the saved configuration file contains the correct parameters.
+#     """
+#     config_path = cache_setup["temp_dir"] / "layers.1" / "config.json"
+#     with open(config_path, "r") as f:
+#         config = json.load(f)
+#     cache_cfg = cache_setup["cache_cfg"]
+#     print(cache_cfg)
+#     assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch"
+#     assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch"
+#     assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch"
diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index 0987edfa..467616cc 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -1,139 +1,139 @@
-import random
-from itertools import chain
-from typing import Any, Literal
+# import random
+# from itertools import chain
+# from typing import Any, Literal
 
-import pytest
-import torch
-from jaxtyping import Int
-from torch import Tensor
-from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
+# import pytest
+# import torch
+# from jaxtyping import Int
+# from torch import Tensor
+# from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 
-from delphi.config import ConstructorConfig, SamplerConfig
-from delphi.latents import (
-    ActivatingExample,
-    Latent,
-    LatentDataset,
-    LatentRecord,
-    constructor,
-    sampler,
-)
-from delphi.latents.latents import ActivationData
+# from delphi.config import ConstructorConfig, SamplerConfig
+# from delphi.latents import (
+#     ActivatingExample,
+#     Latent,
+#     LatentDataset,
+#     LatentRecord,
+#     constructor,
+#     sampler,
+# )
+# from delphi.latents.latents import ActivationData
 
 
-def test_save_load_cache(
-    cache_setup: dict[str, Any],
-    tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
-):
-    sampler_cfg = SamplerConfig(
-        n_examples_train=3,
-        n_examples_test=3,
-        n_quantiles=3,
-        train_type="quantiles",
-        test_type="quantiles",
-    )
-    dataset = LatentDataset(
-        cache_setup["temp_dir"],
-        sampler_cfg,
-        ConstructorConfig(min_examples=0),
-        tokenizer,
-    )
-    tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
-    assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
-    for record in dataset:
-        print(record)
-        assert len(record.train) <= sampler_cfg.n_examples_train
-        assert len(record.test) <= sampler_cfg.n_examples_test
+# def test_save_load_cache(
+#     cache_setup: dict[str, Any],
+#     tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
+# ):
+#     sampler_cfg = SamplerConfig(
+#         n_examples_train=3,
+#         n_examples_test=3,
+#         n_quantiles=3,
+#         train_type="quantiles",
+#         test_type="quantiles",
+#     )
+#     dataset = LatentDataset(
+#         cache_setup["temp_dir"],
+#         sampler_cfg,
+#         ConstructorConfig(min_examples=0),
+#         tokenizer,
+#     )
+#     tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
+#     assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
+#     for record in dataset:
+#         print(record)
+#         assert len(record.train) <= sampler_cfg.n_examples_train
+#         assert len(record.test) <= sampler_cfg.n_examples_test
 
 
-@pytest.fixture(scope="module")
-def seed():
-    random.seed(0)
-    torch.manual_seed(0)
+# @pytest.fixture(scope="module")
+# def seed():
+#     random.seed(0)
+#     torch.manual_seed(0)
 
 
-@pytest.mark.parametrize("n_samples", [5, 10, 100, 1000])
-@pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23])
-@pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20])
-@pytest.mark.parametrize("train_type", ["top", "random", "quantiles"])
-def test_simple_cache(
-    n_samples: int,
-    n_quantiles: int,
-    n_examples: int,
-    train_type: Literal["top", "random", "quantiles"],
-    ctx_len: int = 32,
-    seed: None = None,
-    *,
-    tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
-):
-    torch.manual_seed(0)
-    tokens = torch.randint(
-        0,
-        100,
-        (
-            n_samples,
-            ctx_len,
-        ),
-    )
-    all_activation_data = []
-    all_activations = []
-    for feature_idx in range(2):
-        activations = torch.rand(n_samples, ctx_len, 1) * (
-            torch.rand(n_samples)[..., None, None] ** 2
-        )
-        all_activations.append(activations)
-        mask = activations > 0.1
-        locations = torch.nonzero(mask)
-        locations[..., 2] = feature_idx
-        all_activation_data.append(ActivationData(locations, activations[mask]))
-    activation_data, other_activation_data = all_activation_data
-    activations, other_activations = all_activations
-    record = LatentRecord(latent=Latent("test", 0), examples=[])
-    constructor(
-        record,
-        activation_data,
-        constructor_cfg=ConstructorConfig(
-            example_ctx_len=ctx_len,
-            min_examples=1,
-            n_non_activating=50,
-            non_activating_source="neighbours",
-        ),
-        tokens=tokens,
-        tokenizer=tokenizer,
-        all_data={0: activation_data, 1: other_activation_data},
-    )
-    for i, j in zip(record.examples[:-1], record.examples[1:]):
-        assert i.max_activation >= j.max_activation
-    for i in record.examples:
-        index = (tokens == i.tokens).all(dim=-1).float().argmax()
-        assert (tokens[index] == i.tokens).all()
-        assert activations[index].max() == i.max_activation
-    sampler(
-        record,
-        SamplerConfig(
-            n_examples_train=n_examples,
-            n_examples_test=n_examples,
-            n_quantiles=n_quantiles,
-            train_type=train_type,
-            test_type="quantiles",
-        ),
-        tokenizer=tokenizer,
-    )
-    assert len(record.train) <= n_examples
-    assert len(record.test) <= n_examples
-    for neighbor in record.neighbours:
-        assert neighbor.latent_index == 1
-    for example in chain(record.train, record.test):
-        assert isinstance(example, ActivatingExample)
-        assert example.normalized_activations is not None
-        assert example.normalized_activations.shape == example.activations.shape
-        assert (example.normalized_activations <= 10).all()
-        assert (example.normalized_activations >= 0).all()
-    for quantile_list in (record.test,) + (  # type: ignore
-        (record.train,) if train_type == "quantiles" else ()
-    ):
-        quantile_list: list[ActivatingExample] = quantile_list
-        for k, i in enumerate(quantile_list):
-            for j in quantile_list[k + 1 :]:
-                if i.quantile != j.quantile:
-                    assert i.max_activation >= j.max_activation
-                    assert i.quantile < j.quantile
+# @pytest.mark.parametrize("n_samples", [5, 10, 100, 1000])
+# @pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23])
+# @pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20])
+# @pytest.mark.parametrize("train_type", ["top", "random", "quantiles"])
+# def test_simple_cache(
+#     n_samples: int,
+#     n_quantiles: int,
+#     n_examples: int,
+#     train_type: Literal["top", "random", "quantiles"],
+#     ctx_len: int = 32,
+#     seed: None = None,
+#     *,
+#     tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
+# ):
+#     torch.manual_seed(0)
+#     tokens = torch.randint(
+#         0,
+#         100,
+#         (
+#             n_samples,
+#             ctx_len,
+#         ),
+#     )
+#     all_activation_data = []
+#     all_activations = []
+#     for feature_idx in range(2):
+#         activations = torch.rand(n_samples, ctx_len, 1) * (
+#             torch.rand(n_samples)[..., None, None] ** 2
+#         )
+#         all_activations.append(activations)
+#         mask = activations > 0.1
+#         locations = torch.nonzero(mask)
+#         locations[..., 2] = feature_idx
+#         all_activation_data.append(ActivationData(locations, activations[mask]))
+#     activation_data, other_activation_data = all_activation_data
+#     activations, other_activations = all_activations
+#     record = LatentRecord(latent=Latent("test", 0), examples=[])
+#     constructor(
+#         record,
+#         activation_data,
+#         constructor_cfg=ConstructorConfig(
+#             example_ctx_len=ctx_len,
+#             min_examples=1,
+#             n_non_activating=50,
+#             non_activating_source="neighbours",
+#         ),
+#         tokens=tokens,
+#         tokenizer=tokenizer,
+#         all_data={0: activation_data, 1: other_activation_data},
+#     )
+#     for i, j in zip(record.examples[:-1], record.examples[1:]):
+#         assert i.max_activation >= j.max_activation
+#     for i in record.examples:
+#         index = (tokens == i.tokens).all(dim=-1).float().argmax()
+#         assert (tokens[index] == i.tokens).all()
+#         assert activations[index].max() == i.max_activation
+#     sampler(
+#         record,
+#         SamplerConfig(
+#             n_examples_train=n_examples,
+#             n_examples_test=n_examples,
+#             n_quantiles=n_quantiles,
+#             train_type=train_type,
+#             test_type="quantiles",
+#         ),
+#         tokenizer=tokenizer,
+#     )
+#     assert len(record.train) <= n_examples
+#     assert len(record.test) <= n_examples
+#     for neighbor in record.neighbours:
+#         assert neighbor.latent_index == 1
+#     for example in chain(record.train, record.test):
+#         assert isinstance(example, ActivatingExample)
+#         assert example.normalized_activations is not None
+#         assert example.normalized_activations.shape == example.activations.shape
+#         assert (example.normalized_activations <= 10).all()
+#         assert (example.normalized_activations >= 0).all()
+#     for quantile_list in (record.test,) + (  # type: ignore
+#         (record.train,) if train_type == "quantiles" else ()
+#     ):
+#         quantile_list: list[ActivatingExample] = quantile_list
+#         for k, i in enumerate(quantile_list):
+#             for j in quantile_list[k + 1 :]:
+#                 if i.quantile != j.quantile:
+#                     assert i.max_activation >= j.max_activation
+#                     assert i.quantile < j.quantile
diff --git a/tests/test_latents/ucache.py b/tests/test_latents/ucache.py
deleted file mode 100644
index 850928aa..00000000
--- a/tests/test_latents/ucache.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import json
-import os
-from pathlib import Path
-from typing import Any
-
-import numpy as np
-from safetensors.numpy import load_file
-
-
-def test_latent_locations(cache_setup: dict[str, Any]):
-    """
-    Test that the latent locations generated in memory have the expected
-    shape and values.
-    """
-    cache = cache_setup["cache"]
-    locations = cache.cache.latent_locations["layers.1"]
-    max_values, _ = locations.max(axis=0)
-    # Expected values based on the cache run
-    print(max_values)
-    assert max_values[0] == 5, "Expected first dimension max value to be 5"
-    assert max_values[1] == 15, "Expected token ids to go up to 15"
-    assert max_values[2] > 32700, "Expected latent dimension around 32768"
-
-
-def test_split_files_created(cache_setup: dict[str, Any]):
-    """
-    Test that exactly 5 cache split files have been created.
-    """
-    save_dir = cache_setup["temp_dir"] / "layers.1"
-    cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")]
-    print(cache_files)
-    assert len(cache_files) == 5, "Expected 5 split files in the cache directory"
-
-
-def test_split_file_contents(cache_setup: dict[str, Any]):
-    """
-    Test that one of the split files (loaded via safetensors) holds convincing data:
-    - latent locations and activations have the same number of entries,
-    - tokens were correctly stored and match the input tokens.
-    - latent max values are as expected.
-    """
-    save_dir = cache_setup["temp_dir"] / "layers.1"
-    tokens = cache_setup["tokens"]
-    # Choose one file to verify
-    cache_files = os.listdir(save_dir)
-    file_path = Path(save_dir) / cache_files[0]
-    saved_cache = load_file(str(file_path))
-
-    locations = saved_cache["locations"]
-    activations = saved_cache["activations"]
-    tokens = saved_cache["tokens"]
-
-    assert len(locations) == len(
-        activations
-    ), "Mismatch between locations & activations entries"
-
-    np.testing.assert_array_equal(
-        tokens,
-        tokens[:12, :],
-        err_msg="Tokens saved do not match the input tokens",
-    )
-    max_values = locations.max(axis=0)
-    print(max_values)
-    assert max_values[0] == 5, "Max batch index mismatch in saved file"
-    assert max_values[1] == 15, "Max token value mismatch in saved file"
-    assert max_values[2] > 6499, "Latent dimension mismatch in saved file"
-
-
-def test_config_file(cache_setup: dict[str, Any]):
-    """
-    Test that the saved configuration file contains the correct parameters.
-    """
-    config_path = cache_setup["temp_dir"] / "layers.1" / "config.json"
-    with open(config_path, "r") as f:
-        config = json.load(f)
-    cache_cfg = cache_setup["cache_cfg"]
-    print(cache_cfg)
-    assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch"
-    assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "Cache_ctx_len mismatch"
-    assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch"

From 1da8631d4bebd72f435e96405a94659f071868ec Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:09:00 +0000
Subject: [PATCH 07/22] Constructor again

---
 .github/workflows/build.yml            |  2 +-
 tests/test_latents/test_constructor.py | 72 +++++++++++---------------
 2 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a272de8a..cbe66202 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,7 +23,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install -e ".[dev,visualize]"
       - name: Check space
-        run: du -sh
+        run: du -sh ../.
       - name: Run tests
         run: pytest
       - name: build
diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index 467616cc..4f906d01 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -1,48 +1,38 @@
-# import random
-# from itertools import chain
-# from typing import Any, Literal
+from typing import Any
 
-# import pytest
-# import torch
-# from jaxtyping import Int
-# from torch import Tensor
-# from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
+from jaxtyping import Int
+from torch import Tensor
+from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 
-# from delphi.config import ConstructorConfig, SamplerConfig
-# from delphi.latents import (
-#     ActivatingExample,
-#     Latent,
-#     LatentDataset,
-#     LatentRecord,
-#     constructor,
-#     sampler,
-# )
-# from delphi.latents.latents import ActivationData
+from delphi.config import ConstructorConfig, SamplerConfig
+from delphi.latents import (
+    LatentDataset,
+)
 
 
-# def test_save_load_cache(
-#     cache_setup: dict[str, Any],
-#     tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
-# ):
-#     sampler_cfg = SamplerConfig(
-#         n_examples_train=3,
-#         n_examples_test=3,
-#         n_quantiles=3,
-#         train_type="quantiles",
-#         test_type="quantiles",
-#     )
-#     dataset = LatentDataset(
-#         cache_setup["temp_dir"],
-#         sampler_cfg,
-#         ConstructorConfig(min_examples=0),
-#         tokenizer,
-#     )
-#     tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
-#     assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
-#     for record in dataset:
-#         print(record)
-#         assert len(record.train) <= sampler_cfg.n_examples_train
-#         assert len(record.test) <= sampler_cfg.n_examples_test
+def test_save_load_cache(
+    cache_setup: dict[str, Any],
+    tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
+):
+    sampler_cfg = SamplerConfig(
+        n_examples_train=3,
+        n_examples_test=3,
+        n_quantiles=3,
+        train_type="quantiles",
+        test_type="quantiles",
+    )
+    dataset = LatentDataset(
+        cache_setup["temp_dir"],
+        sampler_cfg,
+        ConstructorConfig(min_examples=0),
+        tokenizer,
+    )
+    tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
+    assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
+    for record in dataset:
+        print(record)
+        assert len(record.train) <= sampler_cfg.n_examples_train
+        assert len(record.test) <= sampler_cfg.n_examples_test
 
 
 # @pytest.fixture(scope="module")

From cbc05e32e1b001543fc54e0073e300b7cbc6ba14 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:13:53 +0000
Subject: [PATCH 08/22] No asserts

---
 tests/test_latents/test_constructor.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index 4f906d01..1d8d4102 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -28,11 +28,12 @@ def test_save_load_cache(
         tokenizer,
     )
     tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
-    assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
+    # assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
+    print(tokens.shape)
     for record in dataset:
         print(record)
-        assert len(record.train) <= sampler_cfg.n_examples_train
-        assert len(record.test) <= sampler_cfg.n_examples_test
+    #     assert len(record.train) <= sampler_cfg.n_examples_train
+    #     assert len(record.test) <= sampler_cfg.n_examples_test
 
 
 # @pytest.fixture(scope="module")

From 9dd154456483c54346ae706a3eee7ad57d47a2c5 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:17:22 +0000
Subject: [PATCH 09/22] add prints

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index cbe66202..99ef07a3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Check space
         run: du -sh ../.
       - name: Run tests
-        run: pytest
+        run: pytest -s
       - name: build
         run: pip wheel --no-deps -w dist .
   release:

From 8df50e4810b57b0089e09045c00bc2db0c1ab148 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:22:09 +0000
Subject: [PATCH 10/22] check space better

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 99ef07a3..13750e75 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,7 +23,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install -e ".[dev,visualize]"
       - name: Check space
-        run: du -sh ../.
+        run: du du -sh /*
       - name: Run tests
         run: pytest -s
       - name: build

From a5a5760ad1add00e2c1eab20a5b2417d236d3e78 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:22:37 +0000
Subject: [PATCH 11/22] now correct

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 13750e75..c31b983e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,7 +23,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install -e ".[dev,visualize]"
       - name: Check space
-        run: du du -sh /*
+        run: du -sh /*
       - name: Run tests
         run: pytest -s
       - name: build

From eac9d6d39af61095e02e13707b2e386d3e70d093 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:28:40 +0000
Subject: [PATCH 12/22] local

---
 .github/workflows/build.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c31b983e..490cc9f5 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ on:
       - main
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: gpaulo-ord-0
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -22,8 +22,6 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -e ".[dev,visualize]"
-      - name: Check space
-        run: du -sh /*
       - name: Run tests
         run: pytest -s
       - name: build

From 7efb5fc199a15a645a7c76b4b3b150402ffc13c1 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 14:30:10 +0000
Subject: [PATCH 13/22] local

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 490cc9f5..dcd24176 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ on:
       - main
 jobs:
   build:
-    runs-on: gpaulo-ord-0
+    runs-on: self-hosted
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5

From 84a01a4e08dedd8e61bf9de72e6e399e2d64d824 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:10:05 +0000
Subject: [PATCH 14/22] Add back all tests

---
 tests/test_latents/test_cache.py       | 136 +++++++++---------
 tests/test_latents/test_constructor.py | 188 ++++++++++++-------------
 2 files changed, 162 insertions(+), 162 deletions(-)

diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py
index 0fe42207..9b281f97 100644
--- a/tests/test_latents/test_cache.py
+++ b/tests/test_latents/test_cache.py
@@ -1,80 +1,80 @@
-# import json
-# import os
-# from pathlib import Path
-# from typing import Any
+import json
+import os
+from pathlib import Path
+from typing import Any
 
-# import numpy as np
-# from safetensors.numpy import load_file
+import numpy as np
+from safetensors.numpy import load_file
 
 
-# def test_latent_locations(cache_setup: dict[str, Any]):
-#     """
-#     Test that the latent locations generated in memory have the expected
-#     shape and values.
-#     """
-#     cache = cache_setup["cache"]
-#     locations = cache.cache.latent_locations["layers.1"]
-#     max_values, _ = locations.max(axis=0)
-#     # Expected values based on the cache run
-#     print(max_values)
-#     assert max_values[0] == 5, "Expected first dimension max value to be 5"
-#     assert max_values[1] == 15, "Expected token ids to go up to 15"
-#     assert max_values[2] > 32700, "Expected latent dimension around 32768"
+def test_latent_locations(cache_setup: dict[str, Any]):
+    """
+    Test that the latent locations generated in memory have the expected
+    shape and values.
+    """
+    cache = cache_setup["cache"]
+    locations = cache.cache.latent_locations["layers.1"]
+    max_values, _ = locations.max(axis=0)
+    # Expected values based on the cache run
+    print(max_values)
+    assert max_values[0] == 5, "Expected first dimension max value to be 5"
+    assert max_values[1] == 15, "Expected token ids to go up to 15"
+    assert max_values[2] > 32700, "Expected latent dimension around 32768"
 
 
-# def test_split_files_created(cache_setup: dict[str, Any]):
-#     """
-#     Test that exactly 5 cache split files have been created.
-#     """
-#     save_dir = cache_setup["temp_dir"] / "layers.1"
-#     cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")]
-#     print(cache_files)
-#     assert len(cache_files) == 5, "Expected 5 split files in the cache directory"
+def test_split_files_created(cache_setup: dict[str, Any]):
+    """
+    Test that exactly 5 cache split files have been created.
+    """
+    save_dir = cache_setup["temp_dir"] / "layers.1"
+    cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")]
+    print(cache_files)
+    assert len(cache_files) == 5, "Expected 5 split files in the cache directory"
 
 
-# def test_split_file_contents(cache_setup: dict[str, Any]):
-#     """
-#     Test that one of the split files (loaded via safetensors) holds convincing data:
-#     - latent locations and activations have the same number of entries,
-#     - tokens were correctly stored and match the input tokens.
-#     - latent max values are as expected.
-#     """
-#     save_dir = cache_setup["temp_dir"] / "layers.1"
-#     tokens = cache_setup["tokens"]
-#     # Choose one file to verify
-#     cache_files = os.listdir(save_dir)
-#     file_path = Path(save_dir) / cache_files[0]
-#     saved_cache = load_file(str(file_path))
+def test_split_file_contents(cache_setup: dict[str, Any]):
+    """
+    Test that one of the split files (loaded via safetensors) holds convincing data:
+    - latent locations and activations have the same number of entries,
+    - tokens were correctly stored and match the input tokens.
+    - latent max values are as expected.
+    """
+    save_dir = cache_setup["temp_dir"] / "layers.1"
+    tokens = cache_setup["tokens"]
+    # Choose one file to verify
+    cache_files = os.listdir(save_dir)
+    file_path = Path(save_dir) / cache_files[0]
+    saved_cache = load_file(str(file_path))
 
-#     locations = saved_cache["locations"]
-#     activations = saved_cache["activations"]
-#     tokens = saved_cache["tokens"]
+    locations = saved_cache["locations"]
+    activations = saved_cache["activations"]
+    tokens = saved_cache["tokens"]
 
-#     assert len(locations) == len(
-#         activations
-#     ), "Mismatch between locations & activations entries"
+    assert len(locations) == len(
+        activations
+    ), "Mismatch between locations & activations entries"
 
-#     np.testing.assert_array_equal(
-#         tokens,
-#         tokens[:12, :],
-#         err_msg="Tokens saved do not match the input tokens",
-#     )
-#     max_values = locations.max(axis=0)
-#     print(max_values)
-#     assert max_values[0] == 5, "Max batch index mismatch in saved file"
-#     assert max_values[1] == 15, "Max token value mismatch in saved file"
-#     assert max_values[2] > 6499, "Latent dimension mismatch in saved file"
+    np.testing.assert_array_equal(
+        tokens,
+        tokens[:12, :],
+        err_msg="Tokens saved do not match the input tokens",
+    )
+    max_values = locations.max(axis=0)
+    print(max_values)
+    assert max_values[0] == 5, "Max batch index mismatch in saved file"
+    assert max_values[1] == 15, "Max token value mismatch in saved file"
+    assert max_values[2] > 6499, "Latent dimension mismatch in saved file"
 
 
-# def test_config_file(cache_setup: dict[str, Any]):
-#     """
-#     Test that the saved configuration file contains the correct parameters.
-#     """
-#     config_path = cache_setup["temp_dir"] / "layers.1" / "config.json"
-#     with open(config_path, "r") as f:
-#         config = json.load(f)
-#     cache_cfg = cache_setup["cache_cfg"]
-#     print(cache_cfg)
-#     assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch"
-#     assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch"
-#     assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch"
+def test_config_file(cache_setup: dict[str, Any]):
+    """
+    Test that the saved configuration file contains the correct parameters.
+    """
+    config_path = cache_setup["temp_dir"] / "layers.1" / "config.json"
+    with open(config_path, "r") as f:
+        config = json.load(f)
+    cache_cfg = cache_setup["cache_cfg"]
+    print(cache_cfg)
+    assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch"
+    assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch"
+    assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch"
diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index 1d8d4102..a0034d33 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -28,103 +28,103 @@ def test_save_load_cache(
         tokenizer,
     )
     tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
-    # assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
+    assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
     print(tokens.shape)
     for record in dataset:
-        print(record)
-    #     assert len(record.train) <= sampler_cfg.n_examples_train
-    #     assert len(record.test) <= sampler_cfg.n_examples_test
+        print(len(record.train), len(record.test))
+        assert len(record.train) <= sampler_cfg.n_examples_train
+        assert len(record.test) <= sampler_cfg.n_examples_test
 
 
-# @pytest.fixture(scope="module")
-# def seed():
-#     random.seed(0)
-#     torch.manual_seed(0)
+@pytest.fixture(scope="module")
+def seed():
+    random.seed(0)
+    torch.manual_seed(0)
 
 
-# @pytest.mark.parametrize("n_samples", [5, 10, 100, 1000])
-# @pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23])
-# @pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20])
-# @pytest.mark.parametrize("train_type", ["top", "random", "quantiles"])
-# def test_simple_cache(
-#     n_samples: int,
-#     n_quantiles: int,
-#     n_examples: int,
-#     train_type: Literal["top", "random", "quantiles"],
-#     ctx_len: int = 32,
-#     seed: None = None,
-#     *,
-#     tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
-# ):
-#     torch.manual_seed(0)
-#     tokens = torch.randint(
-#         0,
-#         100,
-#         (
-#             n_samples,
-#             ctx_len,
-#         ),
-#     )
-#     all_activation_data = []
-#     all_activations = []
-#     for feature_idx in range(2):
-#         activations = torch.rand(n_samples, ctx_len, 1) * (
-#             torch.rand(n_samples)[..., None, None] ** 2
-#         )
-#         all_activations.append(activations)
-#         mask = activations > 0.1
-#         locations = torch.nonzero(mask)
-#         locations[..., 2] = feature_idx
-#         all_activation_data.append(ActivationData(locations, activations[mask]))
-#     activation_data, other_activation_data = all_activation_data
-#     activations, other_activations = all_activations
-#     record = LatentRecord(latent=Latent("test", 0), examples=[])
-#     constructor(
-#         record,
-#         activation_data,
-#         constructor_cfg=ConstructorConfig(
-#             example_ctx_len=ctx_len,
-#             min_examples=1,
-#             n_non_activating=50,
-#             non_activating_source="neighbours",
-#         ),
-#         tokens=tokens,
-#         tokenizer=tokenizer,
-#         all_data={0: activation_data, 1: other_activation_data},
-#     )
-#     for i, j in zip(record.examples[:-1], record.examples[1:]):
-#         assert i.max_activation >= j.max_activation
-#     for i in record.examples:
-#         index = (tokens == i.tokens).all(dim=-1).float().argmax()
-#         assert (tokens[index] == i.tokens).all()
-#         assert activations[index].max() == i.max_activation
-#     sampler(
-#         record,
-#         SamplerConfig(
-#             n_examples_train=n_examples,
-#             n_examples_test=n_examples,
-#             n_quantiles=n_quantiles,
-#             train_type=train_type,
-#             test_type="quantiles",
-#         ),
-#         tokenizer=tokenizer,
-#     )
-#     assert len(record.train) <= n_examples
-#     assert len(record.test) <= n_examples
-#     for neighbor in record.neighbours:
-#         assert neighbor.latent_index == 1
-#     for example in chain(record.train, record.test):
-#         assert isinstance(example, ActivatingExample)
-#         assert example.normalized_activations is not None
-#         assert example.normalized_activations.shape == example.activations.shape
-#         assert (example.normalized_activations <= 10).all()
-#         assert (example.normalized_activations >= 0).all()
-#     for quantile_list in (record.test,) + (  # type: ignore
-#         (record.train,) if train_type == "quantiles" else ()
-#     ):
-#         quantile_list: list[ActivatingExample] = quantile_list
-#         for k, i in enumerate(quantile_list):
-#             for j in quantile_list[k + 1 :]:
-#                 if i.quantile != j.quantile:
-#                     assert i.max_activation >= j.max_activation
-#                     assert i.quantile < j.quantile
+@pytest.mark.parametrize("n_samples", [5, 10, 100, 1000])
+@pytest.mark.parametrize("n_quantiles", [2, 5, 10, 23])
+@pytest.mark.parametrize("n_examples", [0, 2, 5, 10, 20])
+@pytest.mark.parametrize("train_type", ["top", "random", "quantiles"])
+def test_simple_cache(
+    n_samples: int,
+    n_quantiles: int,
+    n_examples: int,
+    train_type: Literal["top", "random", "quantiles"],
+    ctx_len: int = 32,
+    seed: None = None,
+    *,
+    tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
+):
+    torch.manual_seed(0)
+    tokens = torch.randint(
+        0,
+        100,
+        (
+            n_samples,
+            ctx_len,
+        ),
+    )
+    all_activation_data = []
+    all_activations = []
+    for feature_idx in range(2):
+        activations = torch.rand(n_samples, ctx_len, 1) * (
+            torch.rand(n_samples)[..., None, None] ** 2
+        )
+        all_activations.append(activations)
+        mask = activations > 0.1
+        locations = torch.nonzero(mask)
+        locations[..., 2] = feature_idx
+        all_activation_data.append(ActivationData(locations, activations[mask]))
+    activation_data, other_activation_data = all_activation_data
+    activations, other_activations = all_activations
+    record = LatentRecord(latent=Latent("test", 0), examples=[])
+    constructor(
+        record,
+        activation_data,
+        constructor_cfg=ConstructorConfig(
+            example_ctx_len=ctx_len,
+            min_examples=1,
+            n_non_activating=50,
+            non_activating_source="neighbours",
+        ),
+        tokens=tokens,
+        tokenizer=tokenizer,
+        all_data={0: activation_data, 1: other_activation_data},
+    )
+    for i, j in zip(record.examples[:-1], record.examples[1:]):
+        assert i.max_activation >= j.max_activation
+    for i in record.examples:
+        index = (tokens == i.tokens).all(dim=-1).float().argmax()
+        assert (tokens[index] == i.tokens).all()
+        assert activations[index].max() == i.max_activation
+    sampler(
+        record,
+        SamplerConfig(
+            n_examples_train=n_examples,
+            n_examples_test=n_examples,
+            n_quantiles=n_quantiles,
+            train_type=train_type,
+            test_type="quantiles",
+        ),
+        tokenizer=tokenizer,
+    )
+    assert len(record.train) <= n_examples
+    assert len(record.test) <= n_examples
+    for neighbor in record.neighbours:
+        assert neighbor.latent_index == 1
+    for example in chain(record.train, record.test):
+        assert isinstance(example, ActivatingExample)
+        assert example.normalized_activations is not None
+        assert example.normalized_activations.shape == example.activations.shape
+        assert (example.normalized_activations <= 10).all()
+        assert (example.normalized_activations >= 0).all()
+    for quantile_list in (record.test,) + (  # type: ignore
+        (record.train,) if train_type == "quantiles" else ()
+    ):
+        quantile_list: list[ActivatingExample] = quantile_list
+        for k, i in enumerate(quantile_list):
+            for j in quantile_list[k + 1 :]:
+                if i.quantile != j.quantile:
+                    assert i.max_activation >= j.max_activation
+                    assert i.quantile < j.quantile

From dfdcc7027d381304654bcca69b7099bacab4c4cd Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:20:07 +0000
Subject: [PATCH 15/22] cloud

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index dcd24176..908921b3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,7 +12,7 @@ on:
       - main
 jobs:
   build:
-    runs-on: self-hosted
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -21,7 +21,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e ".[dev,visualize]"
+          pip install -e ".[dev]"
       - name: Run tests
         run: pytest -s
       - name: build

From 061b21d29f818c6929a6e020efa70ce804b82f59 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:27:40 +0000
Subject: [PATCH 16/22] why

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 908921b3..796a6dcb 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,7 +21,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e ".[dev]"
+          pip install -e ".[dev,visualize]"
       - name: Run tests
         run: pytest -s
       - name: build

From 19afccda6473e5606db190ae4b0fb049f77756b8 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:38:12 +0000
Subject: [PATCH 17/22] python

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 796a6dcb..3414c4cf 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -17,7 +17,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.12"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -40,7 +40,7 @@ jobs:
           fetch-depth: 0
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.12"
       - name: Install dependencies
         run: pip install build twine
       - name: Semantic Release

From 261bdbcbc058e127daedfa8fbb19f26dfbb0692e Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:47:05 +0000
Subject: [PATCH 18/22] import pytest?

---
 tests/test_latents/test_constructor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index a0034d33..a024d858 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -1,5 +1,6 @@
 from typing import Any
 
+import pytest
 from jaxtyping import Int
 from torch import Tensor
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast

From ed9edeaf81a907700bdaea17187c9d2a6f1bbd02 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:52:02 +0000
Subject: [PATCH 19/22] fix imports

---
 tests/test_latents/test_constructor.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index a024d858..c64238f9 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -1,14 +1,23 @@
-from typing import Any
+import random
+from itertools import chain
+from typing import Any, Literal
 
 import pytest
+import torch
 from jaxtyping import Int
 from torch import Tensor
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 
 from delphi.config import ConstructorConfig, SamplerConfig
 from delphi.latents import (
+    ActivatingExample,
+    Latent,
     LatentDataset,
+    LatentRecord,
+    constructor,
+    sampler,
 )
+from delphi.latents.latents import ActivationData
 
 
 def test_save_load_cache(

From 965cf3e2e21de3d63a553f30824e352ff199b0a7 Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 15:58:42 +0000
Subject: [PATCH 20/22] smaller pythia

---
 tests/conftest.py | 116 +++++++++++++++++++++++-----------------------
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 345a4661..64e1fd94 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -35,14 +35,14 @@
 
 @pytest.fixture(scope="module")
 def tokenizer() -> PreTrainedTokenizer | PreTrainedTokenizerFast:
-    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-160m")
+    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-70m")
     tokenizer.pad_token = tokenizer.eos_token
     return tokenizer
 
 
 @pytest.fixture(scope="module")
 def model() -> PreTrainedModel:
-    model = AutoModel.from_pretrained("EleutherAI/pythia-160m")
+    model = AutoModel.from_pretrained("EleutherAI/pythia-70m")
     return model
 
 
@@ -73,7 +73,7 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM
         sampler_cfg=SamplerConfig(),
         cache_cfg=cache_cfg,
         model="EleutherAI/pythia-160m",
-        sparse_model="EleutherAI/sae-pythia-160m-32k",
+        sparse_model="EleutherAI/sae-pythia-70m-32k",
         hookpoints=["layers.1"],
     )
     hookpoint_to_sparse_encode, _ = load_hooks_sparse_coders(model, run_cfg_gemma)
@@ -98,66 +98,66 @@ def cache_setup(tmp_path_factory, mock_dataset: torch.Tensor, model: PreTrainedM
 
     # Save the cache config
 
-    cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-160m")
-    # hookpoint_firing_counts = torch.load(
-    #     log_path / "hookpoint_firing_counts.pt", weights_only=True
-    # )
+    cache.save_config(temp_dir, cache_cfg, "EleutherAI/pythia-70m")
+    hookpoint_firing_counts = torch.load(
+        log_path / "hookpoint_firing_counts.pt", weights_only=True
+    )
     return {
         "cache": cache,
         "tokens": tokens,
         "cache_cfg": cache_cfg,
         "temp_dir": temp_dir,
-        #    "firing_counts": hookpoint_firing_counts,
+        "firing_counts": hookpoint_firing_counts,
     }
 
 
-# def test_hookpoint_firing_counts_initialization(cache_setup):
-#     """
-#     Ensure that hookpoint_firing_counts is initialized as an empty dictionary.
-#     """
-#     cache = cache_setup["cache"]
-#     assert isinstance(cache.hookpoint_firing_counts, dict)
-#     assert len(cache.hookpoint_firing_counts) == 0  # Should be empty before run()
-
-
-# def test_hookpoint_firing_counts_updates(cache_setup):
-#     """
-#     Ensure that hookpoint_firing_counts is properly updated after running the cache.
-#     """
-#     cache = cache_setup["cache"]
-#     tokens = cache_setup["tokens"]
-#     cache.run(cache_setup["cache_cfg"].n_tokens, tokens)
-
-#     assert (
-#         len(cache.hookpoint_firing_counts) > 0
-#     ), "hookpoint_firing_counts should not be empty after run()"
-#     for hookpoint, counts in cache.hookpoint_firing_counts.items():
-#         assert isinstance(
-#             counts, torch.Tensor
-#         ), f"Counts for {hookpoint} should be a torch.Tensor"
-#         assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor"
-#         assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative"
-
-
-# def test_hookpoint_firing_counts_persistence(cache_setup):
-#     """
-#     Ensure that hookpoint_firing_counts are correctly saved and loaded.
-#     """
-#     cache = cache_setup["cache"]
-#     cache.save_firing_counts()
-
-#     firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt"
-#     assert firing_counts_path.exists(), "Firing counts file should exist after saving"
-
-#     loaded_counts = torch.load(firing_counts_path, weights_only=True)
-#     assert isinstance(
-#         loaded_counts, dict
-#     ), "Loaded firing counts should be a dictionary"
-#     assert (
-#         loaded_counts.keys() == cache.hookpoint_firing_counts.keys()
-#     ), "Loaded firing counts keys should match saved keys"
-
-#     for hookpoint, counts in loaded_counts.items():
-#         assert torch.equal(
-#             counts, cache.hookpoint_firing_counts[hookpoint]
-#         ), f"Mismatch in firing counts for {hookpoint}"
+def test_hookpoint_firing_counts_initialization(cache_setup):
+    """
+    Ensure that hookpoint_firing_counts is initialized as an empty dictionary.
+    """
+    cache = cache_setup["cache"]
+    assert isinstance(cache.hookpoint_firing_counts, dict)
+    assert len(cache.hookpoint_firing_counts) == 0  # Should be empty before run()
+
+
+def test_hookpoint_firing_counts_updates(cache_setup):
+    """
+    Ensure that hookpoint_firing_counts is properly updated after running the cache.
+    """
+    cache = cache_setup["cache"]
+    tokens = cache_setup["tokens"]
+    cache.run(cache_setup["cache_cfg"].n_tokens, tokens)
+
+    assert (
+        len(cache.hookpoint_firing_counts) > 0
+    ), "hookpoint_firing_counts should not be empty after run()"
+    for hookpoint, counts in cache.hookpoint_firing_counts.items():
+        assert isinstance(
+            counts, torch.Tensor
+        ), f"Counts for {hookpoint} should be a torch.Tensor"
+        assert counts.ndim == 1, f"Counts for {hookpoint} should be a 1D tensor"
+        assert (counts >= 0).all(), f"Counts for {hookpoint} should be non-negative"
+
+
+def test_hookpoint_firing_counts_persistence(cache_setup):
+    """
+    Ensure that hookpoint_firing_counts are correctly saved and loaded.
+    """
+    cache = cache_setup["cache"]
+    cache.save_firing_counts()
+
+    firing_counts_path = Path.cwd() / "results" / "log" / "hookpoint_firing_counts.pt"
+    assert firing_counts_path.exists(), "Firing counts file should exist after saving"
+
+    loaded_counts = torch.load(firing_counts_path, weights_only=True)
+    assert isinstance(
+        loaded_counts, dict
+    ), "Loaded firing counts should be a dictionary"
+    assert (
+        loaded_counts.keys() == cache.hookpoint_firing_counts.keys()
+    ), "Loaded firing counts keys should match saved keys"
+
+    for hookpoint, counts in loaded_counts.items():
+        assert torch.equal(
+            counts, cache.hookpoint_firing_counts[hookpoint]
+        ), f"Mismatch in firing counts for {hookpoint}"

From 96247ffd32300ff38ee776e0f0703e8ae4d275ed Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 16:07:30 +0000
Subject: [PATCH 21/22] remove print

---
 tests/test_latents/test_cache.py       | 4 ----
 tests/test_latents/test_constructor.py | 2 --
 2 files changed, 6 deletions(-)

diff --git a/tests/test_latents/test_cache.py b/tests/test_latents/test_cache.py
index 9b281f97..82a93647 100644
--- a/tests/test_latents/test_cache.py
+++ b/tests/test_latents/test_cache.py
@@ -16,7 +16,6 @@ def test_latent_locations(cache_setup: dict[str, Any]):
     locations = cache.cache.latent_locations["layers.1"]
     max_values, _ = locations.max(axis=0)
     # Expected values based on the cache run
-    print(max_values)
     assert max_values[0] == 5, "Expected first dimension max value to be 5"
     assert max_values[1] == 15, "Expected token ids to go up to 15"
     assert max_values[2] > 32700, "Expected latent dimension around 32768"
@@ -28,7 +27,6 @@ def test_split_files_created(cache_setup: dict[str, Any]):
     """
     save_dir = cache_setup["temp_dir"] / "layers.1"
     cache_files = [f for f in os.listdir(save_dir) if f.endswith(".safetensors")]
-    print(cache_files)
     assert len(cache_files) == 5, "Expected 5 split files in the cache directory"
 
 
@@ -60,7 +58,6 @@ def test_split_file_contents(cache_setup: dict[str, Any]):
         err_msg="Tokens saved do not match the input tokens",
     )
     max_values = locations.max(axis=0)
-    print(max_values)
     assert max_values[0] == 5, "Max batch index mismatch in saved file"
     assert max_values[1] == 15, "Max token value mismatch in saved file"
     assert max_values[2] > 6499, "Latent dimension mismatch in saved file"
@@ -74,7 +71,6 @@ def test_config_file(cache_setup: dict[str, Any]):
     with open(config_path, "r") as f:
         config = json.load(f)
     cache_cfg = cache_setup["cache_cfg"]
-    print(cache_cfg)
     assert config["batch_size"] == cache_cfg.batch_size, "Config batch_size mismatch"
     assert config["cache_ctx_len"] == cache_cfg.cache_ctx_len, "ctx_len mismatch"
     assert config["n_tokens"] == cache_cfg.n_tokens, "Config n_tokens mismatch"
diff --git a/tests/test_latents/test_constructor.py b/tests/test_latents/test_constructor.py
index c64238f9..8f74afdd 100644
--- a/tests/test_latents/test_constructor.py
+++ b/tests/test_latents/test_constructor.py
@@ -39,9 +39,7 @@ def test_save_load_cache(
     )
     tokens: Int[Tensor, "examples ctx_len"] = dataset.load_tokens()  # type: ignore
     assert (tokens == cache_setup["tokens"][: len(tokens)]).all()
-    print(tokens.shape)
     for record in dataset:
-        print(len(record.train), len(record.test))
         assert len(record.train) <= sampler_cfg.n_examples_train
         assert len(record.test) <= sampler_cfg.n_examples_test
 

From 7189ce1332d69e066b59b84e9b1734c7c7da941e Mon Sep 17 00:00:00 2001
From: SrGonao <goncalo@eleuther.ai>
Date: Mon, 2 Jun 2025 16:09:50 +0000
Subject: [PATCH 22/22] remove -s

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3414c4cf..c7c0ecfc 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -23,7 +23,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install -e ".[dev,visualize]"
       - name: Run tests
-        run: pytest -s
+        run: pytest
       - name: build
         run: pip wheel --no-deps -w dist .
   release: