In [1]:
from subspace_partition.subspace_partition import run_subspace_partition, SubspacePartitionConfig

import transformer_lens
from pathlib import Path

In [15]:
EMBEDDNING_DIM = 64
NUM_HEADS = 8
VOCABULARY = [c for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
CONTEXT_LENGTH = 32

EXPERIMENT_NAME = "test_experiment"
OUTPUT_DIR = Path("test_outputs")

import shutil
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)

In [10]:
from transformers import PreTrainedTokenizer
from typing import List, Optional

class SimpleCharTokenizer(PreTrainedTokenizer):
    """Super simple character tokenizer that takes a list of chars as alphabet"""
    
    def __init__(self, alphabet: List[str], **kwargs):
        """
        Args:
            alphabet: List of characters to use as vocabulary
        """
        # Store alphabet
        self.alphabet = alphabet
        
        # Create vocab mapping: char -> id
        self.char_to_id = {char: idx for idx, char in enumerate(alphabet)}
        self.id_to_char = {idx: char for char, idx in self.char_to_id.items()}
        
        super().__init__(**kwargs)
        
        # Add special tokens to vocab after parent init
        special_tokens = [
            (self.bos_token, self.bos_token_id),
            (self.eos_token, self.eos_token_id),
            (self.unk_token, self.unk_token_id),
            (self.pad_token, self.pad_token_id),
        ]
        for token, token_id in special_tokens:
            if token and token_id is not None:
                self.char_to_id[token] = token_id
                self.id_to_char[token_id] = token
    
    @property
    def vocab_size(self) -> int:
        return len(self.char_to_id)
    
    def get_vocab(self):
        return self.char_to_id.copy()
    
    def _tokenize(self, text: str) -> List[str]:
        """Split text into individual characters"""
        return list(text)
    
    def _convert_token_to_id(self, token: str) -> int:
        """Convert character to ID"""
        return self.char_to_id.get(token, self.char_to_id.get(self.unk_token, 0))
    
    def _convert_id_to_token(self, index: int) -> str:
        """Convert ID to character"""
        return self.id_to_char.get(index, self.unk_token or "")
    
    def convert_tokens_to_string(self, tokens: List[str]) -> str:
        """Join characters back into string"""
        return "".join(tokens)
    
    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
        """Save vocabulary to file"""
        import json
        import os
        
        if not os.path.isdir(save_directory):
            os.makedirs(save_directory)
        
        vocab_file = os.path.join(
            save_directory, 
            (filename_prefix + "-" if filename_prefix else "") + "vocab.json"
        )
        
        with open(vocab_file, "w", encoding="utf-8") as f:
            json.dump(self.char_to_id, f, ensure_ascii=False, indent=2)
        
        return (vocab_file,)

In [None]:
tokenizer = SimpleCharTokenizer(
    alphabet=VOCABULARY,
    bos_token=">", eos_token="<", unk_token="?", pad_token="_"
    name_or_path="custom",
    add_bos_token=True,
)

model_config = transformer_lens.HookedTransformerConfig(
    d_model=EMBEDDNING_DIM,
    d_head=EMBEDDNING_DIM // NUM_HEADS,
    n_layers=2,
    n_ctx=CONTEXT_LENGTH,
    n_heads=NUM_HEADS,
    d_vocab=tokenizer.vocab_size,
    attn_only=True,
)
model_state_dict_path = Path("copy_transformer.pt")

subspace_partition_config = SubspacePartitionConfig(
    exp_name=EXPERIMENT_NAME,
    output_dir=OUTPUT_DIR,
    model_config=model_config,
    model_weights_path=model_state_dict_path,
    act_sites=["blocks.0.hook_resid_post", "blocks.1.hook_resid_post"],
    tokenizer=tokenizer,
)

In [17]:
run_subspace_partition(subspace_partition_config)

Moving model to device:  cpu


RuntimeError: Error(s) in loading state_dict for HookedTransformer:
	size mismatch for embed.W_E: copying a param with shape torch.Size([30, 64]) from checkpoint, the shape in current model is torch.Size([26, 64]).
	size mismatch for unembed.W_U: copying a param with shape torch.Size([64, 30]) from checkpoint, the shape in current model is torch.Size([64, 26]).
	size mismatch for unembed.b_U: copying a param with shape torch.Size([30]) from checkpoint, the shape in current model is torch.Size([26]).

In [None]:
model = transformer_lens.HookedTransformer(model_config, tokenizer=tokenizer)