In [1]:
from IPython import get_ipython

ipython = get_ipython()
if ipython is not None:
    ipython.magic("%load_ext autoreload")
    ipython.magic("%autoreload 2")

# Sad, really annoying to have to remember this
# import os
# os.environ["TRANSFORMERS_CACHE"] = "/workspace/cache"

  ipython.magic("%load_ext autoreload")
  ipython.magic("%autoreload 2")


In [2]:
from collections import defaultdict

import einops
import torch
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer

from transformer_lens import HookedTransformer, utils

In [3]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
# tl_model = tl_model.to(torch.float64)
# hf_model = hf_model.to(torch.float64)

In [5]:
string = "Hello, world!"
tokens = tl_model.to_tokens(string)
logits, cache = tl_model.run_with_cache(tokens, prepend_bos=False)

done
done
done
done
done
done
done
done
done
done
done
done


In [6]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [7]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [8]:
hf_logits = hf_model(tokens).logits

In [9]:
torch.testing.assert_close(logits, hf_logits, atol=1e-9, rtol=1e-9)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (0, 4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (0, 4, 11201) (up to 1e-09 allowed)

In [None]:
MODE = "ln_f"

if MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")

elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

: 

In [None]:
torch.testing.assert_close(
    # tl_model.W_Q[0, 0],
    # hf_model.transformer.h[0]
    # .attn.c_attn.weight.split(tl_model.cfg.d_model, dim=-1)[0]
    # .split(tl_model.cfg.d_head, dim=-1)[0],
    tl_model.W_U,
    hf_model.lm_head.weight.T,  # Gah!
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

: 

In [10]:
hf_model.lm_head.weight.T.dtype

torch.float32

In [11]:
tl_model.W_U.dtype

torch.float32

In [12]:
MODE = "resid_post"

if MODE == "resid_post":
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"

if MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")

elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

ValueError: Add this please! MODE='resid_post'

In [13]:
MODE = "resid_post"

if MODE == "resid_post":
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"

elif MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")

elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

0
'BaseModelOutputWithPastAndCrossAttentions' object has no attribute 'to'


AssertionError: 

In [14]:
(hf_act)

BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[-0.0667,  0.0881, -0.3085,  ...,  0.0307,  0.0512, -0.0019],
         [-0.0479,  0.1277,  0.3274,  ..., -0.0607,  0.0706,  0.1926],
         [ 0.1930,  0.3386,  0.5010,  ..., -0.0966,  0.3523,  0.2225],
         [-0.3847,  0.1479, -0.7603,  ..., -0.0685, -0.1016,  0.3400],
         [ 0.1005, -0.0756, -0.1506,  ..., -0.1956, -0.0159,  0.0063]]],
       grad_fn=<ViewBackward0>), past_key_values=((tensor([[[[-7.7840e-01,  1.4246e+00,  9.9271e-01,  ..., -1.5725e+00,
           -2.5365e-01,  1.2906e+00],
          [-1.8792e+00,  2.7844e+00,  6.9171e-01,  ..., -8.1772e-01,
           -5.5522e-01,  1.5365e+00],
          [-1.7495e+00,  2.9933e+00,  1.4383e+00,  ..., -9.3925e-01,
           -2.0331e+00,  2.5157e+00],
          [-2.8229e+00,  3.9091e+00,  1.5496e+00,  ..., -1.2042e-01,
           -2.9000e+00,  1.4480e+00],
          [-2.1030e+00,  2.6515e+00,  2.0054e+00,  ..., -1.4825e+00,
           -1.9819e+00,  2.2163e+00

In [15]:
dict(hf_act)

{'last_hidden_state': tensor([[[-0.0667,  0.0881, -0.3085,  ...,  0.0307,  0.0512, -0.0019],
          [-0.0479,  0.1277,  0.3274,  ..., -0.0607,  0.0706,  0.1926],
          [ 0.1930,  0.3386,  0.5010,  ..., -0.0966,  0.3523,  0.2225],
          [-0.3847,  0.1479, -0.7603,  ..., -0.0685, -0.1016,  0.3400],
          [ 0.1005, -0.0756, -0.1506,  ..., -0.1956, -0.0159,  0.0063]]],
        grad_fn=<ViewBackward0>),
 'past_key_values': ((tensor([[[[-7.7840e-01,  1.4246e+00,  9.9271e-01,  ..., -1.5725e+00,
              -2.5365e-01,  1.2906e+00],
             [-1.8792e+00,  2.7844e+00,  6.9171e-01,  ..., -8.1772e-01,
              -5.5522e-01,  1.5365e+00],
             [-1.7495e+00,  2.9933e+00,  1.4383e+00,  ..., -9.3925e-01,
              -2.0331e+00,  2.5157e+00],
             [-2.8229e+00,  3.9091e+00,  1.5496e+00,  ..., -1.2042e-01,
              -2.9000e+00,  1.4480e+00],
             [-2.1030e+00,  2.6515e+00,  2.0054e+00,  ..., -1.4825e+00,
              -1.9819e+00,  2.2163e+00]]

In [16]:
dict(hf_act).keys()

dict_keys(['last_hidden_state', 'past_key_values'])

In [17]:
hf_act["last_hidden_state"].shape

torch.Size([1, 5, 768])

In [1]:
from IPython import get_ipython

ipython = get_ipython()
if ipython is not None:
    ipython.magic("%load_ext autoreload")
    ipython.magic("%autoreload 2")

# Sad, really annoying to have to remember this
# import os
# os.environ["TRANSFORMERS_CACHE"] = "/workspace/cache"

  ipython.magic("%load_ext autoreload")
  ipython.magic("%autoreload 2")


In [2]:
from collections import defaultdict

import einops
import torch
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer

from transformer_lens import HookedTransformer, utils

In [3]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
# tl_model = tl_model.to(torch.float64)
# hf_model = hf_model.to(torch.float64)

In [5]:
string = "Hello, world!"
tokens = tl_model.to_tokens(string)
logits, cache = tl_model.run_with_cache(tokens, prepend_bos=False)

done
done
done
done
done
done
done
done
done
done
done
done


In [6]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [7]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [8]:
hf_logits = hf_model(tokens).logits

In [9]:
torch.testing.assert_close(logits, hf_logits, atol=1e-9, rtol=1e-9)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (0, 4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (0, 4, 11201) (up to 1e-09 allowed)

In [None]:
MODE = "resid_post"

if MODE == "resid_post":
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"

elif MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]
    elif MODE == "last_hidden_state":
        hf_act = hf_act["last_hidden_state"]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

: 

In [None]:
torch.testing.assert_close(
    # tl_model.W_Q[0, 0],
    # hf_model.transformer.h[0]
    # .attn.c_attn.weight.split(tl_model.cfg.d_model, dim=-1)[0]
    # .split(tl_model.cfg.d_head, dim=-1)[0],
    tl_model.W_U,
    hf_model.lm_head.weight.T,  # Gah!
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

: 

In [10]:
MODE = "resid_post"

if MODE == "resid_post":
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"

elif MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]
    elif MODE == "last_hidden_state":
        hf_act = hf_act["last_hidden_state"]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

0
'BaseModelOutputWithPastAndCrossAttentions' object has no attribute 'to'


AssertionError: 

In [11]:
torch.testing.assert_close(logits, hf_logits, atol=1e-9, rtol=1e-9)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (0, 4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (0, 4, 11201) (up to 1e-09 allowed)

In [12]:
torch.testing.assert_close(logits, hf_logits, atol=1e-9, rtol=1e-9)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (0, 4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (0, 4, 11201) (up to 1e-09 allowed)

In [13]:
MODE = "resid_post"

if MODE == "resid_post":
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"

elif MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]
    elif MODE == "last_hidden_state":
        hf_act = hf_act["last_hidden_state"]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

0
'BaseModelOutputWithPastAndCrossAttentions' object has no attribute 'to'


AssertionError: 

In [14]:
hf_act

BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[-0.0667,  0.0881, -0.3085,  ...,  0.0307,  0.0512, -0.0019],
         [-0.0479,  0.1277,  0.3274,  ..., -0.0607,  0.0706,  0.1926],
         [ 0.1930,  0.3386,  0.5010,  ..., -0.0966,  0.3523,  0.2225],
         [-0.3847,  0.1479, -0.7603,  ..., -0.0685, -0.1016,  0.3400],
         [ 0.1005, -0.0756, -0.1506,  ..., -0.1956, -0.0159,  0.0063]]],
       grad_fn=<ViewBackward0>), past_key_values=((tensor([[[[-7.7840e-01,  1.4246e+00,  9.9271e-01,  ..., -1.5725e+00,
           -2.5365e-01,  1.2906e+00],
          [-1.8792e+00,  2.7844e+00,  6.9171e-01,  ..., -8.1772e-01,
           -5.5522e-01,  1.5365e+00],
          [-1.7495e+00,  2.9933e+00,  1.4383e+00,  ..., -9.3925e-01,
           -2.0331e+00,  2.5157e+00],
          [-2.8229e+00,  3.9091e+00,  1.5496e+00,  ..., -1.2042e-01,
           -2.9000e+00,  1.4480e+00],
          [-2.1030e+00,  2.6515e+00,  2.0054e+00,  ..., -1.4825e+00,
           -1.9819e+00,  2.2163e+00

In [15]:
hf_act.shape

AttributeError: 'BaseModelOutputWithPastAndCrossAttentions' object has no attribute 'shape'

In [16]:
MODE = "resid_post"

if MODE == "resid_post":
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"

elif MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]
    elif MODE == "resid_post":
        hf_act = hf_act["last_hidden_state"]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

0
Tensor-likes are not close!

Mismatched elements: 3840 / 3840 (100.0%)
Greatest absolute difference: 404.2613220214844 at index (0, 0, 447) (up to 1e-09 allowed)
Greatest relative difference: 10851.552734375 at index (0, 4, 555) (up to 1e-09 allowed)


AssertionError: 

In [17]:
utils.get_act_name("resid_post", 11)

'blocks.11.hook_resid_post'

In [18]:
MODE = "ln_f"

if MODE == "resid_post":
    raise NotImplementedError()
    tl_activation_name = utils.get_act_name("resid_post", 11)
    hf_activation_name = "transformer"  # oh lol not really

elif MODE == "attn":
    tl_activation_name = "blocks.{layer_idx}.hook_attn_out"
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "pattern":
    tl_activation_name = utils.get_act_name("pattern", "{layer_idx}")  # lol
    if "gpt2" in model_name:
        hf_activation_name = "transformer.h.{layer_idx}.attn.attn_dropout"
    elif "pythia" in model_name:
        hf_activation_name = "gpt_neox.layers.{layer_idx}.attention.attention_dropout"
    else:
        raise ValueError(f"Add this please! {activation_cacher.activations.keys()=}")
elif MODE == "mlp":
    tl_activation_name = utils.get_act_name("mlp_out", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = (
        "transformer.h.{layer_idx}.mlp.dropout"  # Can try .mlp.c_fc... etc
    )
elif MODE == "mlp_pre":
    tl_activation_name = utils.get_act_name("mlp_pre", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_fc"
elif MODE == "mlp_post":
    tl_activation_name = utils.get_act_name("mlp_post", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.mlp.c_proj"
elif MODE == "embed":
    assert "gpt2" in model_name
    tl_activation_name = utils.get_act_name("resid_pre", 0)
    hf_activation_name = "transformer.drop"
elif MODE == "ln":
    tl_activation_name = "blocks.{layer_idx}.ln1.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_1"
elif MODE == "ln2":
    tl_activation_name = "blocks.{layer_idx}.ln2.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.ln_2"
elif MODE == "qkv":
    tl_activation_name = utils.get_act_name("q", "{layer_idx}")
    assert "gpt2" in model_name
    hf_activation_name = "transformer.h.{layer_idx}.attn.c_attn"
elif MODE == "ln_f":
    tl_activation_name = "ln_final.hook_normalized"
    assert "gpt2" in model_name
    hf_activation_name = "transformer.ln_f"

else:
    raise ValueError(f"Add this please! {MODE=}")

saved_preln = activation_cacher.activations["transformer.drop"][0]
# hook_normalized

for i in range(tl_model.cfg.n_layers):
    print(i)
    tl_act = cache[
        tl_activation_name.format(layer_idx=i)
        if "{layer_idx}" in tl_activation_name
        else tl_activation_name
    ]

    hf_act = activation_cacher.activations[
        hf_activation_name.format(layer_idx=i)
        if "{layer_idx}" in hf_activation_name
        else hf_activation_name
    ][0]

    if MODE in "qkv":
        query, key, value = hf_act.split(tl_model.cfg.d_model, dim=2)
        if MODE == "q":
            hf_act = einops.rearrange(
                query, "b s (h d) -> b s h d", h=tl_model.cfg.n_heads
            )
        else:
            raise NotImplementedError()
    elif MODE == "attn":
        hf_act = hf_act[0]
    elif MODE == "resid_post":
        hf_act = hf_act["last_hidden_state"]

    try:  # Suppress
        torch.testing.assert_close(
            tl_act.to(torch.float32),
            hf_act.to(torch.float32),
            atol=1e-9,  # Wow, embed is super close
            rtol=1e-9,  # Better, but still failing a lot!
            # msg="Hello",
        )
    except Exception as e:
        a = str(e)
    else:
        a = None

    if a is not None:
        print(a)
        assert False

else:
    print("All good!")

0
1
2
3
4
5
6
7
8
9
10
11
All good!


In [19]:
hf_act.shape

torch.Size([1, 5, 768])

In [20]:
tl_act.shape

torch.Size([1, 5, 768])

In [21]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act,
    tl_model.W_U,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act,
    hf_model.lm_head.weight.T,
    bias=None,
)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 50257x768)

In [22]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act,
    tl_model.W_U.T,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act,
    hf_model.lm_head.weight.T,
    bias=None,
)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 50257x768)

In [23]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act.T,
    tl_model.W_U,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act,
    hf_model.lm_head.weight.T,
    bias=None,
)

  tl_act.T,


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3840x1 and 50257x768)

In [24]:
tl_act.shape

torch.Size([1, 5, 768])

In [25]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight.T,
    bias=None,
)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 50257x768)

In [26]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.T,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight.T,
    bias=None,
)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 50257x768)

In [27]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.T,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight,
    bias=None,
)

In [28]:
torch.testing.assert_close(
    manual_tl_logits,
    manual_hf_logits,
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (4, 11201) (up to 1e-09 allowed)

In [29]:
torch.testing.assert_close(
    # manual_tl_logits,
    # manual_hf_logits,
    tl_act[0],
    hf_act[0],
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

In [30]:
torch.testing.assert_close(
    # manual_tl_logits,
    # manual_hf_logits,
    # tl_act[0],
    # hf_act[0],
    tl_model.W_U.T,
    hf_model.lm_head.weight,
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

In [31]:
torch.testing.assert_close(
    manual_tl_logits,
    manual_hf_logits,
    # tl_act[0],
    # hf_act[0],
    # tl_model.W_U.T,
    # hf_model.lm_head.weight,
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (4, 11201) (up to 1e-09 allowed)

In [32]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.T.clone(),
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight,
    bias=None,
)

In [33]:
torch.testing.assert_close(
    manual_tl_logits,
    manual_hf_logits,
    # tl_act[0],
    # hf_act[0],
    # tl_model.W_U.T,
    # hf_model.lm_head.weight,
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (4, 11201) (up to 1e-09 allowed)

In [34]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.clone(),
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight,
    bias=None,
)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 50257x768)

In [35]:
torch.testing.assert_close(
    manual_tl_logits,
    manual_hf_logits,
    # tl_act[0],
    # hf_act[0],
    # tl_model.W_U.T,
    # hf_model.lm_head.weight,
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (4, 11201) (up to 1e-09 allowed)

In [36]:
manual_tl_logits[4, 11201].item()

-113.87483215332031

In [37]:
manual_hf_logits[4, 11201].item()

-113.8746566772461

In [38]:
tl_act[0, 4]

tensor([ 1.0053e-01, -7.5591e-02, -1.5063e-01,  2.0151e-01,  4.4182e-02,
        -2.9735e-01,  1.9473e+01, -1.6222e-01, -1.1520e-01, -1.1817e-01,
         2.3299e-02, -5.9292e-01, -5.1516e-01, -1.9742e-01,  1.7883e-01,
        -1.6713e-01,  2.2477e-01, -1.6698e-01,  2.0329e-02, -1.7445e+00,
         2.8544e-01,  1.0833e-01, -1.5417e-01, -4.0604e-01,  2.3619e-01,
        -3.2902e-01, -4.7379e-01,  1.1488e-01, -2.5185e-01,  9.1692e-02,
        -2.2042e-01, -3.1660e-01, -2.5787e-01,  3.4094e-02, -2.5952e-01,
         2.8074e-02,  7.1093e+01,  3.3310e-01,  7.3868e-02,  2.9652e-01,
        -5.3501e-01, -1.5532e-01, -6.8177e-02, -2.7937e-01,  1.5716e-01,
        -3.2104e-01,  1.2069e-01, -8.5994e-02, -9.2824e-03,  2.6996e-01,
         2.0370e-01, -2.2428e-01, -3.1979e-01, -5.0943e-01,  5.6925e-02,
         2.5088e-01,  2.5540e-01,  8.7769e-02, -2.3890e-01, -1.8421e-01,
        -9.5509e-03, -8.2135e-01,  3.5407e-01, -2.9266e-01, -1.1075e+00,
        -2.1076e-01,  3.2335e-01, -3.3997e-01,  8.6

In [39]:
(tl_act[0, 4] - hf_act[0, 4])

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [40]:
tl_model.W_U.clone()[:, 11201]

tensor([-1.5161e-02, -2.6007e-01,  8.5028e-03,  6.9489e-02, -6.7217e-02,
        -3.5594e-02, -3.0318e-01, -1.1172e-01, -2.8577e-01,  6.7894e-02,
        -1.2570e-01,  1.2884e-02, -1.2871e-01,  4.9278e-02, -1.4791e-01,
        -9.3818e-02, -1.2795e-01,  6.1652e-02,  4.9877e-02,  8.1221e-02,
         1.1072e-02, -3.7089e-02,  2.2089e-01, -1.4787e-01, -9.5130e-03,
        -7.0748e-02, -9.6098e-02,  5.5057e-02,  7.7552e-02, -2.2902e-01,
         7.1572e-02,  5.6672e-02,  2.7491e-03,  3.3281e-01,  1.8593e-01,
         1.2634e-01, -3.1934e-01, -7.0840e-02,  2.9342e-02, -6.9033e-02,
        -8.7288e-03, -3.9902e-01, -4.1028e-02,  1.2468e-01, -5.9874e-02,
         7.5226e-04, -1.2274e-01,  4.6405e-02, -3.1357e-02, -1.2324e-01,
         4.4154e-02, -6.8064e-02,  2.2917e-01, -1.1462e-01,  3.9697e-02,
        -2.9766e-01, -7.9124e-02,  6.3836e-02, -4.7934e-02,  7.4962e-02,
        -1.0668e-01, -1.1675e-01,  8.7037e-03,  9.3109e-02,  1.8249e-01,
        -2.1335e-01,  7.9851e-02,  2.1493e-02,  2.7

In [41]:
tl_model.W_U.clone()[:, 11201] - hf_model.lm_head.weight[:, 11201]

IndexError: index 11201 is out of bounds for dimension 1 with size 768

In [42]:
tl_model.W_U.clone()[:, 11201] - hf_model.lm_head.weight[11201]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [43]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.clone(),
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight,
    bias=None,
)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5x768 and 50257x768)

In [44]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.clone().T,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight,
    bias=None,
)

In [45]:
assert MODE == "ln_f"
manual_tl_logits = torch.nn.functional.linear(
    tl_act[0],
    tl_model.W_U.clone().T,
    bias=None,
)
manual_hf_logits = torch.nn.functional.linear(
    hf_act[0],
    hf_model.lm_head.weight,
    bias=None,
)

In [46]:
torch.testing.assert_close(
    manual_tl_logits,
    manual_hf_logits,
    # tl_act[0],
    # hf_act[0],
    # tl_model.W_U.T,
    # hf_model.lm_head.weight,
    atol=1e-9,
    rtol=1e-9,  # Weights *are* close.
)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.5409581237690873e-06 at index (4, 11201) (up to 1e-09 allowed)

In [47]:
(tl_act[0, 4] - hf_act[0, 4])

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [48]:
(tl_model.W_U.clone()[:, 11201] - hf_model.lm_head.weight[11201]).abs()

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [49]:
(tl_model.W_U.clone()[:, 11201] - hf_model.lm_head.weight[11201]).abs().max()

tensor(0., grad_fn=<MaxBackward1>)

In [50]:
(tl_model.W_U.clone()[:, 11201] - hf_model.lm_head.weight[11201]).abs().max().item()

0.0

In [51]:
(tl_act[0, 4] - hf_act[0, 4]).abs().max().item()

0.0

In [52]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(3)]
bees = [gen_matrix((1000, 4), 1) for _ in range(3)]

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b) for a, b in zip(ays, bees)]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1000 and 4x1000)

In [53]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b) for a, b in zip(ays, bees)]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1000 and 4x1000)

In [54]:
bees[0].shape

torch.Size([1000, 4])

In [55]:
ays[0].shape

torch.Size([2, 1000])

In [56]:
ays[0] @ bees[0]

tensor([[248.0671, 245.6817, 243.8322, 248.9217],
        [237.3750, 243.0664, 241.9444, 244.3960]])

In [57]:
help(torch.nn.functional.linear)

Help on built-in function linear in module torch._C._nn:

linear(...)
    linear(input, weight, bias=None) -> Tensor
    
    Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.
    
    This operation supports 2-D :attr:`weight` with :ref:`sparse layout<sparse-docs>`
    
    
        Sparse support is a beta feature and some layout(s)/dtype/device combinations may not be supported,
        or may not have autograd support. If you notice missing functionality please
        open a feature request.
    
    This operator supports :ref:`TensorFloat32<tf32_on_ampere>`.
    
    Shape:
    
        - Input: :math:`(*, in\_features)` where `*` means any number of
          additional dimensions, including none
        - Weight: :math:`(out\_features, in\_features)` or :math:`(in\_features)`
        - Bias: :math:`(out\_features)` or :math:`()`
        - Output: :math:`(*, out\_features)` or :math:`(*)`, based on the shape of the weight



In [58]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b.T) for a, b in zip(ays, bees)]

In [59]:
tl_act[0].shape

torch.Size([5, 768])

In [60]:
tl_model.W_U.clone().T.shape

torch.Size([50257, 768])

In [61]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b.T) for a, b in zip(ays, bees)]

assert (prods[0] - prods[1]).abs().max() < 1e-9

In [62]:
(prods[0] - prods[1]).abs().max()

tensor(0.)

In [63]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

ays[1] += 1e-9
bees[1] += 1e-9

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b.T) for a, b in zip(ays, bees)]

assert (prods[0] - prods[1]).abs().max() < 1e-9

AssertionError: 

In [64]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

ays[1] += 1e-10
bees[1] += 1e-10

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b.T) for a, b in zip(ays, bees)]

assert (prods[0] - prods[1]).abs().max() < 1e-9

In [65]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

ays[1] += 1e-10
bees[1] -= 1e-10

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b.T) for a, b in zip(ays, bees)]

assert (prods[0] - prods[1]).abs().max() < 1e-9

In [66]:
import torch


def gen_matrix(shape, seed):
    torch.manual_seed(seed)
    return torch.rand(shape)


ays = [gen_matrix((2, 1000), 0) for _ in range(2)]
bees = [gen_matrix((1000, 4), 1) for _ in range(2)]

ays[1] += 1e-10
bees[1] -= 1e-10

assert (ays[0] - ays[1]).abs().max() < 1e-9
assert (bees[0] - bees[1]).abs().max() < 1e-9

prods = [torch.nn.functional.linear(a, b.T) for a, b in zip(ays, bees)]

assert (prods[0] - prods[1]).abs().max() < 1e-9

In [67]:
(ays[0] - ays[1]).abs().max()

tensor(1.1642e-10)

In [1]:
from IPython import get_ipython

ipython = get_ipython()
if ipython is not None:
    ipython.magic("%load_ext autoreload")
    ipython.magic("%autoreload 2")

# Sad, really annoying to have to remember this
# import os
# os.environ["TRANSFORMERS_CACHE"] = "/workspace/cache"

  ipython.magic("%load_ext autoreload")
  ipython.magic("%autoreload 2")


In [2]:
from collections import defaultdict

import einops
import torch
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer

from transformer_lens import HookedTransformer, utils

In [3]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
# tl_model = tl_model.to(torch.float64)
# hf_model = hf_model.to(torch.float64)

In [5]:
string = "Hello, world!"
tokens = tl_model.to_tokens(string)
logits, cache = tl_model.run_with_cache(tokens, prepend_bos=False)

done
done
done
done
done
done
done
done
done
done
done
done


In [6]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [7]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [8]:
hf_logits = hf_model(tokens).logits

In [9]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [10]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [11]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [12]:
hf_logits = hf_model(tokens).logits

In [13]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [14]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [15]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [16]:
hf_logits = hf_model(tokens).logits

In [17]:
help(hf_model)

Help on GPT2LMHeadModel in module transformers.models.gpt2.modeling_gpt2 object:

class GPT2LMHeadModel(GPT2PreTrainedModel)
 |  GPT2LMHeadModel(config)
 |  
 |  The GPT2 Model transformer with a language modeling head on top (linear layer with weights tied to the input
 |  embeddings).
 |  
 |  
 |  This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
 |  library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
 |  etc.)
 |  
 |  This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
 |  Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
 |  and behavior.
 |  
 |  Parameters:
 |      config ([`GPT2Config`]): Model configuration class with all the parameters of the model.
 |          Initializing with a config file does not load the weights associated wit

In [18]:
type(hf_model)

transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel

In [19]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [20]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [21]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [22]:
hf_logits = hf_model(tokens).logits

Hello!


In [23]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [24]:
# tl_model = tl_model.to(torch.float64)
# hf_model = hf_model.to(torch.float64)

In [25]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [26]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [27]:
hf_logits = hf_model(tokens).logits

Lineah!


In [28]:
model_name = "gpt2"  # ??? Why so slow
tl_model = HookedTransformer.from_pretrained_no_processing(model_name)
hf_model = AutoModelForCausalLM.from_pretrained(model_name)
hf_model.eval()

Loaded pretrained model gpt2 into HookedTransformer


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [29]:
# tl_model = tl_model.to(torch.float64)
# hf_model = hf_model.to(torch.float64)

In [30]:
class ActivationCacher:
    def __init__(self):
        self.activations = defaultdict(list)

    def cache_activations(self, module, module_name):
        def hook(module, input, output):
            self.activations[module_name].append(output)

        return hook

In [31]:
# Create an ActivationCacher instance
activation_cacher = ActivationCacher()

# Register hooks for caching activations
for name, module in hf_model.named_modules():
    module.register_forward_hook(activation_cacher.cache_activations(module, name))

In [32]:
hf_logits = hf_model(tokens).logits

Lineah! None


In [33]:
# ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

In [34]:
print()




In [35]:
# ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

torch.testing.assert_close(wus[0], wus[1], atol=1e-9, rtol=1e-9)

In [36]:
# ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

torch.testing.assert_close(wus[0], wus[1], atol=1e-9, rtol=1e-9)
torch.testing.assert_close(residuals[0], residuals[1], atol=1e-9, rtol=1e-9)

In [37]:
# ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

torch.testing.assert_close(wus[0], wus[1], atol=1e-9, rtol=1e-9)
torch.testing.assert_close(residuals[0], residuals[1], atol=1e-9, rtol=1e-9)

linears = [
    torch.nn.functional.linear(
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

In [38]:
# ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

torch.testing.assert_close(wus[0], wus[1], atol=1e-9, rtol=1e-9)
torch.testing.assert_close(residuals[0], residuals[1], atol=1e-9, rtol=1e-9)

linears = [
    torch.nn.functional.linear(
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

torch.testing.assert_close(
    linears[0],
    linears[1],
    atol=1e-9,
    rtol=1e-9,
)

AssertionError: Tensor-likes are not close!

Mismatched elements: 219480 / 251285 (87.3%)
Greatest absolute difference: 0.00017547607421875 at index (0, 4, 11201) (up to 1e-09 allowed)
Greatest relative difference: 1.540955736345495e-06 at index (0, 4, 11201) (up to 1e-09 allowed)

In [39]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-9
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9
print(wus[0].abs().max(), residuals[1].abs().max())

linears = [
    torch.nn.functional.linear(
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

assert torch.abs(linears[0] - linears[1]).max().item() < 1e-9

tensor(1.7852, grad_fn=<MaxBackward1>) tensor(225.6747, grad_fn=<MaxBackward1>)


AssertionError: 

In [40]:
residuals[1].abs().mean()

tensor(0.6899, grad_fn=<MeanBackward0>)

In [41]:
residuals[1].abs().min()

tensor(1.5672e-06, grad_fn=<MinBackward1>)

In [42]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-9
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9
print(wus[0].abs().max(), residuals[1].abs().max())

linears = [
    torch.nn.functional.linear(
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

assert torch.abs(linears[0] - linears[1]).max().item() < 1e-4

tensor(1.7852, grad_fn=<MaxBackward1>) tensor(225.6747, grad_fn=<MaxBackward1>)


AssertionError: 

In [43]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-9
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9
print(wus[0].abs().max(), residuals[1].abs().max())

linears = [
    torch.nn.functional.linear(
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4

tensor(1.7852, grad_fn=<MaxBackward1>) tensor(225.6747, grad_fn=<MaxBackward1>)


AssertionError: 

In [44]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-9
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9
print(wus[0].abs().max(), residuals[1].abs().max())

linears = [
    torch.nn.functional.linear(
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max

tensor(1.7852, grad_fn=<MaxBackward1>) tensor(225.6747, grad_fn=<MaxBackward1>)


AssertionError: 0.00017547607421875

In [45]:
residual.shape

NameError: name 'residual' is not defined

In [46]:
residuals[0].shape

torch.Size([1, 5, 768])

In [47]:
linears[0].shape

torch.Size([1, 5, 50257])

In [48]:
768 * 224

172032

In [49]:
torch.abs(linears[0] - linears[1]).argmax()

tensor(212229)

In [50]:
(linears[0] - linears[1])[21222 % 5, 212229 // 5]

IndexError: index 2 is out of bounds for dimension 0 with size 1

In [51]:
(linears[0] - linears[1])[0, 21222 % 5, 212229 // 5]

tensor(6.1035e-05, grad_fn=<SelectBackward0>)

In [52]:
(linears[0] - linears[1])[0, 212229 % 5, 212229 // 5]

tensor(-5.3406e-05, grad_fn=<SelectBackward0>)

In [53]:
(linears[0] - linears[1])[0, :, 212229 // 5]

tensor([ 2.2888e-05, -3.8147e-05,  6.1035e-05, -7.6294e-06, -5.3406e-05],
       grad_fn=<SelectBackward0>)

In [54]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1) for wu in wus]
residuals = [residual / residual.norm(dim=-1) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-9
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

RuntimeError: The size of tensor a (768) must match the size of tensor b (50257) at non-singleton dimension 1

In [55]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-9
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

AssertionError: 

In [56]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-7
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-7
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

AssertionError: 

In [57]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-5
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-5
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

tensor(0.5800, grad_fn=<MaxBackward1>) tensor(0.9197, grad_fn=<MaxBackward1>)


In [58]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-5
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-7
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

tensor(0.5800, grad_fn=<MaxBackward1>) tensor(0.9197, grad_fn=<MaxBackward1>)


In [59]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-7
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-7
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

AssertionError: 

In [60]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert torch.abs(wus[0] - wus[1]).max().item() < 1e-6
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-7
print(
    wus[0].abs().max(), residuals[1].abs().max()
)  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

tensor(0.5800, grad_fn=<MaxBackward1>) tensor(0.9197, grad_fn=<MaxBackward1>)


In [61]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

AssertionError: 0.00017547607421875

In [62]:
from safetensors.torch import save_file

tensors = {"weight1": torch.zeros((1024, 1024)), "weight2": torch.zeros((1024, 1024))}
save_file(tensors, "model.safetensors")

In [63]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

for prefix, wu in zip(prefixes, wus):
    save_file(wu, "wu.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

ValueError: Expected a dict of [str, torch.Tensor] but received <class 'torch.nn.parameter.Parameter'>

In [64]:
wu

Parameter containing:
tensor([[-0.1101, -0.0393,  0.0331,  ..., -0.1364,  0.0151,  0.0453],
        [ 0.0403, -0.0486,  0.0462,  ...,  0.0861,  0.0025,  0.0432],
        [-0.1275,  0.0479,  0.1841,  ...,  0.0899, -0.1297, -0.0879],
        ...,
        [-0.0445, -0.0548,  0.0123,  ...,  0.1044,  0.0978, -0.0695],
        [ 0.1860,  0.0167,  0.0461,  ..., -0.0963,  0.0785, -0.0225],
        [ 0.0514, -0.0277,  0.0499,  ...,  0.0070,  0.1552,  0.1207]],
       requires_grad=True)

In [65]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

for prefix, wu in zip(prefixes, wus):
    save_file(wu, {"wu: f"{prefix}wu.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

SyntaxError: unterminated string literal (detected at line 9) (<ipython-input-65-eb911f1bca60>, line 9)

In [66]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

for prefix, wu in zip(prefixes, wus):
    save_file(wu, {"wu": f"{prefix}wu.safetensors"})

for prefix, residual in zip(prefixes, residuals):
    save_file(residual, {"residual": f"{prefix}residual.safetensors"})

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

ValueError: Expected a dict of [str, torch.Tensor] but received <class 'torch.nn.parameter.Parameter'>

In [67]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt") for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt") for prefix in prefixes]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")
# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

ValueError: You are trying to save a non contiguous tensor: `wu` which is not allowed. It either means you are trying to save tensors which are reference of each other in which case it's recommended to save only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to pack it before saving.

In [68]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone() for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt").clone() for prefix in prefixes]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

ValueError: You are trying to save a non contiguous tensor: `wu` which is not allowed. It either means you are trying to save tensors which are reference of each other in which case it's recommended to save only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to pack it before saving.

In [69]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt").clone() for prefix in prefixes]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-6
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-6

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [70]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [
    torch.load(f"{prefix}residual.pt").clone().contiguous() for prefix in prefixes
]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-4, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [71]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [
    torch.load(f"{prefix}residual.pt").clone().contiguous() for prefix in prefixes
]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-5, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [72]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [
    torch.load(f"{prefix}residual.pt").clone().contiguous() for prefix in prefixes
]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-6, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [73]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [
    torch.load(f"{prefix}residual.pt").clone().contiguous() for prefix in prefixes
]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-7, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [74]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [
    torch.load(f"{prefix}residual.pt").clone() for prefix in prefixes
]

for prefix, wu in zip(prefixes, wus):
    save_file({"wu": wu}, f"{prefix}wu.safetensors")

for prefix, residual in zip(prefixes, residuals):
    save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-7, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [75]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt").clone() for prefix in prefixes]

# for prefix, wu in zip(prefixes, wus):
#     save_file({"wu": wu}, f"{prefix}wu.safetensors")

# for prefix, residual in zip(prefixes, residuals):
#     save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-7, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [76]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt").clone() for prefix in prefixes]

# for prefix, wu in zip(prefixes, wus):
#     save_file({"wu": wu}, f"{prefix}wu.safetensors")

# for prefix, residual in zip(prefixes, residuals):
#     save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-7, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5

In [77]:
# Ah
prefixes = ["hf_", "tl_"]
wus = [torch.load(f"{prefix}wu.pt").clone().contiguous() for prefix in prefixes]
residuals = [torch.load(f"{prefix}residual.pt").clone() for prefix in prefixes]

# for prefix, wu in zip(prefixes, wus):
#     save_file({"wu": wu}, f"{prefix}wu.safetensors")

# for prefix, residual in zip(prefixes, residuals):
#     save_file({"residual": residual}, f"{prefix}residual.safetensors")

# wus = [wu / wu.norm(dim=-1, keepdim=True) for wu in wus]
# residuals = [residual / residual.norm(dim=-1, keepdim=True) for residual in residuals]

assert (
    torch.abs(wus[0] - wus[1]).max().item() < 1e-9
)  # Lots some by normalizing: 1e-9 used to work!
assert torch.abs(residuals[0] - residuals[1]).max().item() < 1e-9

# print(
#     wus[0].abs().max(), residuals[1].abs().max()
# )  # There are 1.752 and 225.6747. They go as low as 1e-6 too...

linears = [
    torch.nn.functional.linear(  # The linear dimension is 768
        residual,
        wu,
        bias=None,
    )
    for residual, wu in zip(residuals, wus)
]

abs_max = torch.abs(linears[0] - linears[1]).max().item()
assert abs_max < 1e-9, abs_max  # Fails! How

# Ohhh ...768*224 is >1e5