In [1]:
import os
import sys
sys.path.append('../Automatic-Circuit-Discovery/')
sys.path.append('../tracr/')
sys.path.append('..')
import re

import acdc
from acdc.TLACDCExperiment import TLACDCExperiment
from acdc.acdc_utils import TorchIndex, EdgeType
import numpy as np
import torch as t
from torch import Tensor
import einops
import itertools

from transformer_lens import HookedTransformer, ActivationCache

import tqdm.notebook as tqdm
import plotly
from rich import print as rprint
from rich.table import Table

from jaxtyping import Float, Bool
from typing import Callable, Tuple, Union, Dict, Optional

device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')
print(f'Device: {device}')

Device: cuda


# Model Setup

In [2]:
model = HookedTransformer.from_pretrained(
    'gpt2-small',
    center_writing_weights=False,
    center_unembed=False,
    fold_ln=False,
    device=device,
)
model.set_use_hook_mlp_in(True)
model.set_use_split_qkv_input(True)
model.set_use_attn_result(True)

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-small into HookedTransformer


from acdc.ioi.utils import get_ioi_true_edges
ioi_true_edges = get_ioi_true_edges(model)
for (b1, i1, b2, i2), val in ioi_true_edges.items():
    if val:
        print(b1, b2)

# Dataset Setup

In [3]:
from ioi_dataset import IOIDataset, format_prompt, make_table
N = 25
clean_dataset = IOIDataset(
    prompt_type='mixed',
    N=N,
    tokenizer=model.tokenizer,
    prepend_bos=False,
    seed=1,
    device=device
)
corr_dataset = clean_dataset.gen_flipped_prompts('ABC->XYZ, BAB->XYZ')

make_table(
  colnames = ["IOI prompt", "IOI subj", "IOI indirect obj", "ABC prompt"],
  cols = [
    map(format_prompt, clean_dataset.sentences),
    model.to_string(clean_dataset.s_tokenIDs).split(),
    model.to_string(clean_dataset.io_tokenIDs).split(),
    map(format_prompt, clean_dataset.sentences),
  ],
  title = "Sentences from IOI vs ABC distribution",
)

# Metric Setup

In [4]:
def ave_logit_diff(
    logits: Float[Tensor, 'batch seq d_vocab'],
    ioi_dataset: IOIDataset,
    per_prompt: bool = False
):
    '''
        Return average logit difference between correct and incorrect answers
    '''
    # Get logits for indirect objects
    io_logits = logits[range(logits.size(0)), ioi_dataset.word_idx['end'], ioi_dataset.io_tokenIDs]
    s_logits = logits[range(logits.size(0)), ioi_dataset.word_idx['end'], ioi_dataset.s_tokenIDs]
    # Get logits for subject
    logit_diff = io_logits - s_logits
    return logit_diff if per_prompt else logit_diff.mean()

with t.no_grad():
    clean_logits = model(clean_dataset.toks)
    corrupt_logits = model(corr_dataset.toks)
    clean_logit_diff = ave_logit_diff(clean_logits, clean_dataset).item()
    corrupt_logit_diff = ave_logit_diff(corrupt_logits, corr_dataset).item()

def ioi_metric(
    logits: Float[Tensor, "batch seq_len d_vocab"],
    corrupted_logit_diff: float = corrupt_logit_diff,
    clean_logit_diff: float = clean_logit_diff,
    ioi_dataset: IOIDataset = clean_dataset
 ):
    patched_logit_diff = ave_logit_diff(logits, ioi_dataset)
    return (patched_logit_diff - corrupted_logit_diff) / (clean_logit_diff - corrupted_logit_diff)

def abs_ioi_metric(logits: Float[Tensor, "batch seq_len d_vocab"]):
    return abs(ioi_metric(logits))

def negative_ioi_metric(logits: Float[Tensor, "batch seq_len d_vocab"]):
    return -ioi_metric(logits)

def negative_abs_ioi_metric(logits: Float[Tensor, "batch seq_len d_vocab"]):
    return -abs_ioi_metric(logits)

# Get clean and corrupt logit differences
with t.no_grad():
    clean_metric = ioi_metric(clean_logits, corrupt_logit_diff, clean_logit_diff, clean_dataset)
    corrupt_metric = ioi_metric(corrupt_logits, corrupt_logit_diff, clean_logit_diff, corr_dataset)

print(f'Clean direction: {clean_logit_diff}, Corrupt direction: {corrupt_logit_diff}')
print(f'Clean metric: {clean_metric}, Corrupt metric: {corrupt_metric}')

Clean direction: 2.805180072784424, Corrupt direction: 2.0037853717803955
Clean metric: 1.0, Corrupt metric: 0.0


# Run Experiment

In [5]:
from ACDCPPExperiment import ACDCPPExperiment
THRESHOLDS = np.arange(0.005, 0.155, 0.005)
RUN_NAME = 'abs_edge'
acdcpp_exp = ACDCPPExperiment(model,
                              clean_dataset.toks,
                              corr_dataset.toks,
                              acdc_metric=negative_abs_ioi_metric,
                              acdcpp_metric=ioi_metric,
                              thresholds=THRESHOLDS,
                              run_name=RUN_NAME,
                              verbose=False,
                              attr_absolute_val=True,
                              save_graphs_after=0,
                              pruning_mode = "edge",
                              no_pruned_nodes_attr = 1,
                             )
pruned_heads, num_passes, pruned_attrs = acdcpp_exp.run()



ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13831/69420 [00:00<00:00, 138239.60it/s][A
 40%|███▉      | 27655/69420 [00:00<00:00, 56131.26it/s] [A
 51%|█████▏    | 35680/69420 [00:00<00:00, 39487.11it/s][A
 59%|█████▉    | 41162/69420 [00:01<00:00, 31349.57it/s][A
 65%|██████▌   | 45233/69420 [00:01<00:00, 26766.17it/s][A
 70%|██████▉   | 48451/69420 [00:01<00:00, 23731.55it/s][A
 74%|███████▎  | 51116/69420 [00:01<00:00, 21219.96it/s][A
 77%|███████▋  | 53380/69420 [00:01<00:00, 19310.03it/s][A
 80%|███████▉  | 55360/69420 [00:02<00:00, 17425.66it/s][A
 82%|████████▏ | 57101/69420 [00:02<00:00, 16212.85it/s][A
 85%|████████▍ | 58690/69420 [00:02<00:00, 15197.63it/s][A
 87%|████████▋ | 60163/69420 [00:02<00:00, 14264.99it/s][A
 89%|████████▊ | 61539/69420 [00:02<00:00, 13434.08it/s][A
 91%|█████████ | 62835/69420 [00:02<00:00, 12699.99it/s][A
 92%|█████████▏| 64062/69420 [00:02<00:00, 12397.76it/s][A
 94%|█████████▍| 65266/69420 [00:02<00:00, 11716.48i

Saving ACDC++ Graph





Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13830/69420 [00:00<00:00, 138175.28it/s][A
 40%|███▉      | 27648/69420 [00:00<00:00, 56480.99it/s] [A
 51%|█████▏    | 35687/69420 [00:00<00:00, 39945.96it/s][A
 59%|█████▉    | 41196/69420 [00:01<00:00, 31788.68it/s][A
 65%|██████▌   | 45298/69420 [00:01<00:00, 27107.42it/s][A
 70%|██████▉   | 48544/69420 [00:01<00:00, 23893.43it/s][A
 74%|███████▍  | 51224/69420 [00:01<00:00, 21381.34it/s][A
 77%|███████▋  | 53503/69420 [00:01<00:00, 19395.75it/s][A
 80%|███████▉  | 55491/69420 [00:02<00:00, 17562.43it/s][A
 82%|████████▏ | 57245/69420 [00:02<00:00, 16392.73it/s][A
 85%|████████▍ | 58851/69420 [00:02<00:00, 15477.63it/s][A
 87%|████████▋ | 60352/69420 [00:02<00:00, 14326.29it/s][A
 89%|████████▉ | 61736/69420 [00:02<00:00, 13491.77it/s][A
 91%|█████████ | 63038/69420 [00:02<00:00, 12841.55it/s][A
 93%|█████████▎| 64280/69420 [00:02<00:00, 12478.06it/s][A
 94%|█████████▍| 65492/69420 [00:02<00:00, 11785.85i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13566/69420 [00:00<00:00, 135656.57it/s][A
 39%|███▉      | 27132/69420 [00:00<00:00, 55858.38it/s] [A
 50%|█████     | 35046/69420 [00:00<00:00, 39293.79it/s][A
 58%|█████▊    | 40464/69420 [00:01<00:00, 31802.21it/s][A
 64%|██████▍   | 44532/69420 [00:01<00:00, 26863.71it/s][A
 69%|██████▉   | 47741/69420 [00:01<00:00, 23419.74it/s][A
 73%|███████▎  | 50373/69420 [00:01<00:00, 20978.80it/s][A
 76%|███████▌  | 52611/69420 [00:01<00:00, 19229.14it/s][A
 79%|███████▊  | 54581/69420 [00:01<00:00, 17386.05it/s][A
 81%|████████  | 56317/69420 [00:02<00:00, 16109.38it/s][A
 83%|████████▎ | 57896/69420 [00:02<00:00, 15201.22it/s][A
 86%|████████▌ | 59370/69420 [00:02<00:00, 14114.89it/s][A
 87%|████████▋ | 60733/69420 [00:02<00:00, 13270.96it/s][A
 89%|████████▉ | 62014/69420 [00:02<00:00, 12455.23it/s][A
 91%|█████████ | 63218/69420 [00:02<00:00, 12077.89it/s][A
 93%|█████████▎| 64390/69420 [00:02<00:00, 11625.23i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13801/69420 [00:00<00:00, 137914.12it/s][A
 40%|███▉      | 27593/69420 [00:00<00:00, 55371.02it/s] [A
 51%|█████     | 35567/69420 [00:00<00:00, 39263.43it/s][A
 59%|█████▉    | 41022/69420 [00:01<00:00, 31410.96it/s][A
 65%|██████▍   | 45086/69420 [00:01<00:00, 26572.28it/s][A
 70%|██████▉   | 48284/69420 [00:01<00:00, 23454.32it/s][A
 73%|███████▎  | 50923/69420 [00:01<00:00, 21064.26it/s][A
 77%|███████▋  | 53171/69420 [00:01<00:00, 19179.40it/s][A
 79%|███████▉  | 55137/69420 [00:02<00:00, 17612.05it/s][A
 82%|████████▏ | 56893/69420 [00:02<00:00, 16220.14it/s][A
 84%|████████▍ | 58482/69420 [00:02<00:00, 15346.09it/s][A
 86%|████████▋ | 59970/69420 [00:02<00:00, 14083.73it/s][A
 88%|████████▊ | 61332/69420 [00:02<00:00, 13271.47it/s][A
 90%|█████████ | 62613/69420 [00:02<00:00, 12794.69it/s][A
 92%|█████████▏| 63851/69420 [00:02<00:00, 12492.09it/s][A
 94%|█████████▎| 65066/69420 [00:02<00:00, 11852.09i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13812/69420 [00:00<00:00, 138067.13it/s][A
 40%|███▉      | 27619/69420 [00:00<00:00, 56326.94it/s] [A
 51%|█████▏    | 35647/69420 [00:00<00:00, 39788.75it/s][A
 59%|█████▉    | 41144/69420 [00:01<00:00, 31520.62it/s][A
 65%|██████▌   | 45227/69420 [00:01<00:00, 26443.79it/s][A
 70%|██████▉   | 48425/69420 [00:01<00:00, 23554.87it/s][A
 74%|███████▎  | 51078/69420 [00:01<00:00, 21208.98it/s][A
 77%|███████▋  | 53342/69420 [00:01<00:00, 19394.50it/s][A
 80%|███████▉  | 55330/69420 [00:02<00:00, 17733.22it/s][A
 82%|████████▏ | 57099/69420 [00:02<00:00, 16370.40it/s][A
 85%|████████▍ | 58703/69420 [00:02<00:00, 15165.56it/s][A
 87%|████████▋ | 60174/69420 [00:02<00:00, 14193.48it/s][A
 89%|████████▊ | 61544/69420 [00:02<00:00, 13153.75it/s][A
 90%|█████████ | 62813/69420 [00:02<00:00, 12550.84it/s][A
 92%|█████████▏| 64026/69420 [00:02<00:00, 12012.49it/s][A
 94%|█████████▍| 65190/69420 [00:02<00:00, 11405.16i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13832/69420 [00:00<00:00, 138193.61it/s][A
 40%|███▉      | 27652/69420 [00:00<00:00, 56518.66it/s] [A
 51%|█████▏    | 35694/69420 [00:00<00:00, 39892.06it/s][A
 59%|█████▉    | 41201/69420 [00:01<00:00, 31847.99it/s][A
 65%|██████▌   | 45307/69420 [00:01<00:00, 27235.22it/s][A
 70%|██████▉   | 48562/69420 [00:01<00:00, 23949.59it/s][A
 74%|███████▍  | 51247/69420 [00:01<00:00, 21474.12it/s][A
 77%|███████▋  | 53534/69420 [00:01<00:00, 19561.79it/s][A
 80%|████████  | 55537/69420 [00:01<00:00, 17830.96it/s][A
 83%|████████▎ | 57315/69420 [00:02<00:00, 16587.90it/s][A
 85%|████████▍ | 58939/69420 [00:02<00:00, 15411.30it/s][A
 87%|████████▋ | 60433/69420 [00:02<00:00, 14133.72it/s][A
 89%|████████▉ | 61799/69420 [00:02<00:00, 13318.74it/s][A
 91%|█████████ | 63084/69420 [00:02<00:00, 12636.85it/s][A
 93%|█████████▎| 64305/69420 [00:02<00:00, 12193.11it/s][A
 94%|█████████▍| 65487/69420 [00:02<00:00, 11623.19i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13811/69420 [00:00<00:00, 138061.74it/s][A
 40%|███▉      | 27618/69420 [00:00<00:00, 56080.47it/s] [A
 51%|█████▏    | 35633/69420 [00:00<00:00, 39770.45it/s][A
 59%|█████▉    | 41126/69420 [00:01<00:00, 31761.47it/s][A
 65%|██████▌   | 45221/69420 [00:01<00:00, 26873.28it/s][A
 70%|██████▉   | 48447/69420 [00:01<00:00, 23870.48it/s][A
 74%|███████▎  | 51124/69420 [00:01<00:00, 21422.00it/s][A
 77%|███████▋  | 53405/69420 [00:01<00:00, 19491.77it/s][A
 80%|███████▉  | 55402/69420 [00:02<00:00, 17660.10it/s][A
 82%|████████▏ | 57165/69420 [00:02<00:00, 16486.41it/s][A
 85%|████████▍ | 58780/69420 [00:02<00:00, 15526.48it/s][A
 87%|████████▋ | 60286/69420 [00:02<00:00, 14337.80it/s][A
 89%|████████▉ | 61672/69420 [00:02<00:00, 13497.06it/s][A
 91%|█████████ | 62975/69420 [00:02<00:00, 12921.99it/s][A
 93%|█████████▎| 64225/69420 [00:02<00:00, 12493.23it/s][A
 94%|█████████▍| 65438/69420 [00:02<00:00, 11802.79i

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13820/69420 [00:00<00:00, 138149.74it/s][A
 40%|███▉      | 27635/69420 [00:00<00:00, 56409.09it/s] [A
 51%|█████▏    | 35670/69420 [00:00<00:00, 39804.81it/s][A
 59%|█████▉    | 41171/69420 [00:01<00:00, 31508.11it/s][A
 65%|██████▌   | 45255/69420 [00:01<00:00, 26854.54it/s][A
 70%|██████▉   | 48482/69420 [00:01<00:00, 23875.00it/s][A
 74%|███████▎  | 51160/69420 [00:01<00:00, 21424.89it/s][A
 77%|███████▋  | 53442/69420 [00:01<00:00, 19347.08it/s][A
 80%|███████▉  | 55427/69420 [00:02<00:00, 17445.15it/s][A
 82%|████████▏ | 57171/69420 [00:02<00:00, 16368.75it/s][A
 85%|████████▍ | 58776/69420 [00:02<00:00, 15415.53it/s][A
 87%|████████▋ | 60271/69420 [00:02<00:00, 14278.32it/s][A
 89%|████████▉ | 61651/69420 [00:02<00:00, 13225.60it/s][A
 91%|█████████ | 62929/69420 [00:02<00:00, 12561.00it/s][A
 92%|█████████▏| 64144/69420 [00:02<00:00, 12142.43it/s][A
 94%|█████████▍| 65322/69420 [00:02<00:00, 11408.57i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13815/69420 [00:00<00:00, 138044.15it/s][A
 40%|███▉      | 27620/69420 [00:00<00:00, 56081.79it/s] [A
 51%|█████▏    | 35635/69420 [00:00<00:00, 39726.08it/s][A
 59%|█████▉    | 41125/69420 [00:01<00:00, 31697.23it/s][A
 65%|██████▌   | 45216/69420 [00:01<00:00, 26753.26it/s][A
 70%|██████▉   | 48434/69420 [00:01<00:00, 23771.90it/s][A
 74%|███████▎  | 51103/69420 [00:01<00:00, 21394.32it/s][A
 77%|███████▋  | 53382/69420 [00:01<00:00, 19541.57it/s][A
 80%|███████▉  | 55383/69420 [00:02<00:00, 17754.64it/s][A
 82%|████████▏ | 57155/69420 [00:02<00:00, 16498.78it/s][A
 85%|████████▍ | 58771/69420 [00:02<00:00, 15513.82it/s][A
 87%|████████▋ | 60275/69420 [00:02<00:00, 14308.60it/s][A
 89%|████████▉ | 61658/69420 [00:02<00:00, 13491.15it/s][A
 91%|█████████ | 62960/69420 [00:02<00:00, 12917.69it/s][A
 92%|█████████▏| 64209/69420 [00:02<00:00, 12367.68it/s][A
 94%|█████████▍| 65409/69420 [00:02<00:00, 11669.66i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13806/69420 [00:00<00:00, 137937.79it/s][A
 40%|███▉      | 27600/69420 [00:00<00:00, 56189.66it/s] [A
 51%|█████▏    | 35616/69420 [00:00<00:00, 39259.33it/s][A
 59%|█████▉    | 41080/69420 [00:01<00:00, 31419.73it/s][A
 65%|██████▌   | 45151/69420 [00:01<00:00, 26668.81it/s][A
 70%|██████▉   | 48360/69420 [00:01<00:00, 23432.08it/s][A
 73%|███████▎  | 51000/69420 [00:01<00:00, 20983.20it/s][A
 77%|███████▋  | 53242/69420 [00:01<00:00, 19260.42it/s][A
 80%|███████▉  | 55217/69420 [00:02<00:00, 17800.41it/s][A
 82%|████████▏ | 56991/69420 [00:02<00:00, 16554.81it/s][A
 84%|████████▍ | 58612/69420 [00:02<00:00, 15346.23it/s][A
 87%|████████▋ | 60101/69420 [00:02<00:00, 14423.34it/s][A
 89%|████████▊ | 61494/69420 [00:02<00:00, 13585.48it/s][A
 90%|█████████ | 62805/69420 [00:02<00:00, 12996.03it/s][A
 92%|█████████▏| 64062/69420 [00:02<00:00, 12275.99it/s][A
 94%|█████████▍| 65254/69420 [00:02<00:00, 11647.81i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13389/69420 [00:00<00:00, 133843.54it/s][A
 39%|███▊      | 26774/69420 [00:00<00:00, 56425.57it/s] [A
 50%|████▉     | 34648/69420 [00:00<00:00, 40500.62it/s][A
 58%|█████▊    | 40104/69420 [00:01<00:00, 32348.62it/s][A
 64%|██████▎   | 44197/69420 [00:01<00:00, 27304.86it/s][A
 68%|██████▊   | 47434/69420 [00:01<00:00, 23991.95it/s][A
 72%|███████▏  | 50109/69420 [00:01<00:00, 21261.65it/s][A
 75%|███████▌  | 52372/69420 [00:01<00:00, 19670.25it/s][A
 78%|███████▊  | 54383/69420 [00:01<00:00, 18080.47it/s][A
 81%|████████  | 56184/69420 [00:02<00:00, 16759.69it/s][A
 83%|████████▎ | 57826/69420 [00:02<00:00, 15651.63it/s][A
 85%|████████▌ | 59345/69420 [00:02<00:00, 14577.37it/s][A
 88%|████████▊ | 60755/69420 [00:02<00:00, 13667.12it/s][A
 89%|████████▉ | 62076/69420 [00:02<00:00, 13029.00it/s][A
 91%|█████████ | 63337/69420 [00:02<00:00, 12569.16it/s][A
 93%|█████████▎| 64558/69420 [00:02<00:00, 12120.14i

Saving ACDC++ Graph


 37%|███▋      | 11/30 [3:31:03<1:18:09, 246.81s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13828/69420 [00:00<00:00, 138178.00it/s][A
 40%|███▉      | 27646/69420 [00:00<00:00, 56067.41it/s] [A
 51%|█████▏    | 35665/69420 [00:00<00:00, 39750.42it/s][A
 59%|█████▉    | 41159/69420 [00:01<00:00, 31739.67it/s][A
 65%|██████▌   | 45254/69420 [00:01<00:00, 26801.68it/s][A
 70%|██████▉   | 48476/69420 [00:01<00:00, 23564.20it/s][A
 74%|███████▎  | 51129/69420 [00:01<00:00, 21048.19it/s][A
 77%|███████▋  | 53378/69420 [00:01<00:00, 19249.39it/s][A
 80%|███████▉  | 55352/69420 [00:02<00:00, 17335.67it/s][A
 82%|████████▏ | 57085/69420 [00:02<00:00, 16219.97it/s][A
 85%|████████▍ | 58675/69420 [00:02<00:00, 15056.74it/s][A
 87%|████████▋ | 60135/69420 [00:02<00:00, 14233.32it/s][A
 89%|████████▊ | 61508/69420 [00:02<00:00, 13325.45it/s][A
 90%|█████████ | 62793/69420 [00:02<00:00, 12698.15it/s][A
 92%|█████████▏| 64020/69420 [00:02<00:00, 12134.13it/s][A
 94%|█████████▍| 65197/69420 [00:02<00:00, 11458.21i

Saving ACDC++ Graph


 40%|████      | 12/30 [3:32:56<1:01:48, 206.04s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13419/69420 [00:00<00:00, 134080.16it/s][A
 39%|███▊      | 26828/69420 [00:00<00:00, 56309.31it/s] [A
 50%|████▉     | 34705/69420 [00:00<00:00, 41018.75it/s][A
 58%|█████▊    | 40193/69420 [00:01<00:00, 32446.09it/s][A
 64%|██████▍   | 44298/69420 [00:01<00:00, 27448.40it/s][A
 68%|██████▊   | 47548/69420 [00:01<00:00, 24247.39it/s][A
 72%|███████▏  | 50246/69420 [00:01<00:00, 21714.59it/s][A
 76%|███████▌  | 52548/69420 [00:01<00:00, 19809.54it/s][A
 79%|███████▊  | 54571/69420 [00:01<00:00, 17652.41it/s][A
 81%|████████  | 56337/69420 [00:02<00:00, 16446.10it/s][A
 83%|████████▎ | 57951/69420 [00:02<00:00, 15576.04it/s][A
 86%|████████▌ | 59464/69420 [00:02<00:00, 14416.00it/s][A
 88%|████████▊ | 60859/69420 [00:02<00:00, 13469.60it/s][A
 90%|████████▉ | 62162/69420 [00:02<00:00, 12847.87it/s][A
 91%|█████████▏| 63406/69420 [00:02<00:00, 12462.71it/s][A
 93%|█████████▎| 64617/69420 [00:02<00:00, 12019.89i

Saving ACDC++ Graph


 43%|████▎     | 13/30 [3:34:48<50:18, 177.54s/it]  

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13809/69420 [00:00<00:00, 137966.78it/s][A
 40%|███▉      | 27606/69420 [00:00<00:00, 56549.07it/s] [A
 51%|█████▏    | 35641/69420 [00:00<00:00, 39688.16it/s][A
 59%|█████▉    | 41133/69420 [00:01<00:00, 31529.06it/s][A
 65%|██████▌   | 45216/69420 [00:01<00:00, 25035.18it/s][A
 70%|██████▉   | 48329/69420 [00:01<00:00, 22403.96it/s][A
 73%|███████▎  | 50901/69420 [00:01<00:00, 20570.71it/s][A
 77%|███████▋  | 53120/69420 [00:01<00:00, 18900.41it/s][A
 79%|███████▉  | 55071/69420 [00:02<00:00, 17261.32it/s][A
 82%|████████▏ | 56802/69420 [00:02<00:00, 16082.74it/s][A
 84%|████████▍ | 58383/69420 [00:02<00:00, 15377.46it/s][A
 86%|████████▋ | 59880/69420 [00:02<00:00, 14226.67it/s][A
 88%|████████▊ | 61260/69420 [00:02<00:00, 13440.69it/s][A
 90%|█████████ | 62562/69420 [00:02<00:00, 12873.72it/s][A
 92%|█████████▏| 63811/69420 [00:02<00:00, 12372.69it/s][A
 94%|█████████▎| 65015/69420 [00:02<00:00, 11772.62i

Saving ACDC++ Graph


 47%|████▋     | 14/30 [3:36:40<42:03, 157.70s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13397/69420 [00:00<00:00, 133935.32it/s][A
 39%|███▊      | 26791/69420 [00:00<00:00, 57722.11it/s] [A
 50%|█████     | 34735/69420 [00:00<00:00, 41496.62it/s][A
 58%|█████▊    | 40263/69420 [00:01<00:00, 32480.95it/s][A
 64%|██████▍   | 44383/69420 [00:01<00:00, 27652.48it/s][A
 69%|██████▊   | 47656/69420 [00:01<00:00, 24429.33it/s][A
 73%|███████▎  | 50373/69420 [00:01<00:00, 21744.67it/s][A
 76%|███████▌  | 52681/69420 [00:01<00:00, 19815.27it/s][A
 79%|███████▉  | 54707/69420 [00:01<00:00, 17915.02it/s][A
 81%|████████▏ | 56495/69420 [00:02<00:00, 16558.99it/s][A
 84%|████████▎ | 58118/69420 [00:02<00:00, 15396.19it/s][A
 86%|████████▌ | 59612/69420 [00:02<00:00, 14406.44it/s][A
 88%|████████▊ | 61003/69420 [00:02<00:00, 13550.39it/s][A
 90%|████████▉ | 62310/69420 [00:02<00:00, 12964.86it/s][A
 92%|█████████▏| 63563/69420 [00:02<00:00, 12337.15it/s][A
 93%|█████████▎| 64760/69420 [00:02<00:00, 11980.47i

Saving ACDC++ Graph


 50%|█████     | 15/30 [3:38:31<35:55, 143.69s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13818/69420 [00:00<00:00, 138168.27it/s][A
 40%|███▉      | 27635/69420 [00:00<00:00, 56282.92it/s] [A
 51%|█████▏    | 35665/69420 [00:00<00:00, 39628.32it/s][A
 59%|█████▉    | 41155/69420 [00:01<00:00, 31729.47it/s][A
 65%|██████▌   | 45250/69420 [00:01<00:00, 26982.75it/s][A
 70%|██████▉   | 48485/69420 [00:01<00:00, 23987.53it/s][A
 74%|███████▎  | 51172/69420 [00:01<00:00, 21552.13it/s][A
 77%|███████▋  | 53465/69420 [00:01<00:00, 19485.44it/s][A
 80%|███████▉  | 55462/69420 [00:02<00:00, 17656.89it/s][A
 82%|████████▏ | 57225/69420 [00:02<00:00, 16491.07it/s][A
 85%|████████▍ | 58841/69420 [00:02<00:00, 15518.88it/s][A
 87%|████████▋ | 60346/69420 [00:02<00:00, 14164.32it/s][A
 89%|████████▉ | 61717/69420 [00:02<00:00, 13394.08it/s][A
 91%|█████████ | 63011/69420 [00:02<00:00, 12833.14it/s][A
 93%|█████████▎| 64253/69420 [00:02<00:00, 12386.43it/s][A
 94%|█████████▍| 65456/69420 [00:02<00:00, 11713.30i

Saving ACDC++ Graph


 53%|█████▎    | 16/30 [3:40:15<30:46, 131.86s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13814/69420 [00:00<00:00, 138130.58it/s][A
 40%|███▉      | 27628/69420 [00:00<00:00, 55914.39it/s] [A
 51%|█████▏    | 35638/69420 [00:00<00:00, 39644.74it/s][A
 59%|█████▉    | 41124/69420 [00:01<00:00, 31500.04it/s][A
 65%|██████▌   | 45202/69420 [00:01<00:00, 26639.88it/s][A
 70%|██████▉   | 48411/69420 [00:01<00:00, 23591.97it/s][A
 74%|███████▎  | 51065/69420 [00:01<00:00, 21203.55it/s][A
 77%|███████▋  | 53327/69420 [00:01<00:00, 19384.14it/s][A
 80%|███████▉  | 55313/69420 [00:02<00:00, 17774.02it/s][A
 82%|████████▏ | 57085/69420 [00:02<00:00, 16531.60it/s][A
 85%|████████▍ | 58704/69420 [00:02<00:00, 15371.82it/s][A
 87%|████████▋ | 60194/69420 [00:02<00:00, 14345.40it/s][A
 89%|████████▊ | 61579/69420 [00:02<00:00, 13516.72it/s][A
 91%|█████████ | 62883/69420 [00:02<00:00, 12915.19it/s][A
 92%|█████████▏| 64132/69420 [00:02<00:00, 12437.66it/s][A
 94%|█████████▍| 65339/69420 [00:02<00:00, 11626.70i

Saving ACDC++ Graph


 57%|█████▋    | 17/30 [3:41:56<26:31, 122.45s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13812/69420 [00:00<00:00, 138033.58it/s][A
 40%|███▉      | 27616/69420 [00:00<00:00, 56286.72it/s] [A
 51%|█████▏    | 35640/69420 [00:00<00:00, 39589.49it/s][A
 59%|█████▉    | 41125/69420 [00:01<00:00, 31423.93it/s][A
 65%|██████▌   | 45200/69420 [00:01<00:00, 26473.40it/s][A
 70%|██████▉   | 48398/69420 [00:01<00:00, 23478.53it/s][A
 74%|███████▎  | 51043/69420 [00:01<00:00, 21133.34it/s][A
 77%|███████▋  | 53299/69420 [00:01<00:00, 19325.70it/s][A
 80%|███████▉  | 55280/69420 [00:02<00:00, 17837.94it/s][A
 82%|████████▏ | 57058/69420 [00:02<00:00, 16582.58it/s][A
 85%|████████▍ | 58682/69420 [00:02<00:00, 15408.68it/s][A
 87%|████████▋ | 60177/69420 [00:02<00:00, 14398.24it/s][A
 89%|████████▊ | 61568/69420 [00:02<00:00, 13444.24it/s][A
 91%|█████████ | 62866/69420 [00:02<00:00, 12705.96it/s][A
 92%|█████████▏| 64095/69420 [00:02<00:00, 12239.64it/s][A
 94%|█████████▍| 65283/69420 [00:02<00:00, 11419.88i

Saving ACDC++ Graph


 60%|██████    | 18/30 [3:43:35<23:06, 115.52s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13419/69420 [00:00<00:00, 134125.53it/s][A
 39%|███▊      | 26832/69420 [00:00<00:00, 56708.34it/s] [A
 50%|█████     | 34731/69420 [00:00<00:00, 40926.52it/s][A
 58%|█████▊    | 40219/69420 [00:01<00:00, 31610.10it/s][A
 64%|██████▍   | 44278/69420 [00:01<00:00, 26861.72it/s][A
 68%|██████▊   | 47489/69420 [00:01<00:00, 23825.35it/s][A
 72%|███████▏  | 50154/69420 [00:01<00:00, 21319.35it/s][A
 76%|███████▌  | 52423/69420 [00:01<00:00, 19659.51it/s][A
 78%|███████▊  | 54434/69420 [00:01<00:00, 17924.79it/s][A
 81%|████████  | 56222/69420 [00:02<00:00, 16426.48it/s][A
 83%|████████▎ | 57834/69420 [00:02<00:00, 15343.79it/s][A
 85%|████████▌ | 59324/69420 [00:02<00:00, 14180.14it/s][A
 87%|████████▋ | 60695/69420 [00:02<00:00, 13236.89it/s][A
 89%|████████▉ | 61973/69420 [00:02<00:00, 12575.55it/s][A
 91%|█████████ | 63189/69420 [00:02<00:00, 12149.71it/s][A
 93%|█████████▎| 64368/69420 [00:02<00:00, 11670.27i

Saving ACDC++ Graph


 63%|██████▎   | 19/30 [3:45:14<20:16, 110.64s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13800/69420 [00:00<00:00, 137953.43it/s][A
 40%|███▉      | 27596/69420 [00:00<00:00, 56260.90it/s] [A
 51%|█████▏    | 35616/69420 [00:00<00:00, 39555.61it/s][A
 59%|█████▉    | 41098/69420 [00:01<00:00, 31576.81it/s][A
 65%|██████▌   | 45181/69420 [00:01<00:00, 26760.06it/s][A
 70%|██████▉   | 48398/69420 [00:01<00:00, 23612.52it/s][A
 74%|███████▎  | 51054/69420 [00:01<00:00, 21261.62it/s][A
 77%|███████▋  | 53321/69420 [00:01<00:00, 19432.33it/s][A
 80%|███████▉  | 55312/69420 [00:02<00:00, 17813.75it/s][A
 82%|████████▏ | 57088/69420 [00:02<00:00, 16417.49it/s][A
 85%|████████▍ | 58696/69420 [00:02<00:00, 15330.27it/s][A
 87%|████████▋ | 60182/69420 [00:02<00:00, 14335.59it/s][A
 89%|████████▊ | 61566/69420 [00:02<00:00, 13257.29it/s][A
 91%|█████████ | 62846/69420 [00:02<00:00, 12768.68it/s][A
 92%|█████████▏| 64081/69420 [00:02<00:00, 12370.62it/s][A
 94%|█████████▍| 65282/69420 [00:02<00:00, 11656.56i

Saving ACDC++ Graph


 67%|██████▋   | 20/30 [3:46:51<17:45, 106.50s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13814/69420 [00:00<00:00, 138053.57it/s][A
 40%|███▉      | 27620/69420 [00:00<00:00, 56591.47it/s] [A
 51%|█████▏    | 35660/69420 [00:00<00:00, 39836.15it/s][A
 59%|█████▉    | 41162/69420 [00:01<00:00, 31719.41it/s][A
 65%|██████▌   | 45259/69420 [00:01<00:00, 27083.38it/s][A
 70%|██████▉   | 48502/69420 [00:01<00:00, 24039.60it/s][A
 74%|███████▎  | 51194/69420 [00:01<00:00, 21580.97it/s][A
 77%|███████▋  | 53490/69420 [00:01<00:00, 19514.29it/s][A
 80%|███████▉  | 55490/69420 [00:02<00:00, 17620.91it/s][A
 82%|████████▏ | 57250/69420 [00:02<00:00, 16470.16it/s][A
 85%|████████▍ | 58864/69420 [00:02<00:00, 15340.41it/s][A
 87%|████████▋ | 60352/69420 [00:02<00:00, 14211.04it/s][A
 89%|████████▉ | 61724/69420 [00:02<00:00, 13158.39it/s][A
 91%|█████████ | 62994/69420 [00:02<00:00, 12589.52it/s][A
 92%|█████████▏| 64211/69420 [00:02<00:00, 12222.72it/s][A
 94%|█████████▍| 65397/69420 [00:02<00:00, 11417.24i

Saving ACDC++ Graph


 70%|███████   | 21/30 [3:48:28<15:33, 103.69s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13821/69420 [00:00<00:00, 138181.47it/s][A
 40%|███▉      | 27640/69420 [00:00<00:00, 56210.38it/s] [A
 51%|█████▏    | 35667/69420 [00:00<00:00, 39959.98it/s][A
 59%|█████▉    | 41175/69420 [00:01<00:00, 31642.27it/s][A
 65%|██████▌   | 45267/69420 [00:01<00:00, 26839.54it/s][A
 70%|██████▉   | 48493/69420 [00:01<00:00, 23792.63it/s][A
 74%|███████▎  | 51165/69420 [00:01<00:00, 21218.93it/s][A
 77%|███████▋  | 53430/69420 [00:01<00:00, 19027.85it/s][A
 80%|███████▉  | 55386/69420 [00:02<00:00, 17361.11it/s][A
 82%|████████▏ | 57120/69420 [00:02<00:00, 16301.86it/s][A
 85%|████████▍ | 58717/69420 [00:02<00:00, 15324.79it/s][A
 87%|████████▋ | 60203/69420 [00:02<00:00, 14309.57it/s][A
 89%|████████▊ | 61585/69420 [00:02<00:00, 13496.49it/s][A
 91%|█████████ | 62887/69420 [00:02<00:00, 12891.50it/s][A
 92%|█████████▏| 64134/69420 [00:02<00:00, 12420.49it/s][A
 94%|█████████▍| 65340/69420 [00:02<00:00, 11659.99i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13400/69420 [00:00<00:00, 133894.14it/s][A
 39%|███▊      | 26790/69420 [00:00<00:00, 56805.65it/s] [A
 50%|████▉     | 34685/69420 [00:00<00:00, 41129.69it/s][A
 58%|█████▊    | 40181/69420 [00:01<00:00, 32636.38it/s][A
 64%|██████▍   | 44300/69420 [00:01<00:00, 27291.60it/s][A
 68%|██████▊   | 47542/69420 [00:01<00:00, 24295.21it/s][A
 72%|███████▏  | 50246/69420 [00:01<00:00, 21749.92it/s][A
 76%|███████▌  | 52553/69420 [00:01<00:00, 19994.73it/s][A
 79%|███████▊  | 54594/69420 [00:01<00:00, 17912.70it/s][A
 81%|████████  | 56384/69420 [00:02<00:00, 16655.16it/s][A
 84%|████████▎ | 58018/69420 [00:02<00:00, 15879.97it/s][A
 86%|████████▌ | 59562/69420 [00:02<00:00, 14572.93it/s][A
 88%|████████▊ | 60976/69420 [00:02<00:00, 13858.83it/s][A
 90%|████████▉ | 62319/69420 [00:02<00:00, 13153.58it/s][A
 92%|█████████▏| 63595/69420 [00:02<00:00, 12408.83it/s][A
 93%|█████████▎| 64803/69420 [00:02<00:00, 11767.58i

Saving ACDC++ Graph


 77%|███████▋  | 23/30 [3:51:40<11:37, 99.66s/it] 

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13410/69420 [00:00<00:00, 134079.99it/s][A
 39%|███▊      | 26818/69420 [00:00<00:00, 56804.51it/s] [A
 50%|█████     | 34720/69420 [00:00<00:00, 41316.82it/s][A
 58%|█████▊    | 40231/69420 [00:01<00:00, 32532.67it/s][A
 64%|██████▍   | 44348/69420 [00:01<00:00, 27320.04it/s][A
 69%|██████▊   | 47594/69420 [00:01<00:00, 24470.21it/s][A
 72%|███████▏  | 50313/69420 [00:01<00:00, 21859.84it/s][A
 76%|███████▌  | 52631/69420 [00:01<00:00, 19965.97it/s][A
 79%|███████▉  | 54671/69420 [00:01<00:00, 17819.90it/s][A
 81%|████████▏ | 56454/69420 [00:02<00:00, 16560.59it/s][A
 84%|████████▎ | 58080/69420 [00:02<00:00, 15618.78it/s][A
 86%|████████▌ | 59597/69420 [00:02<00:00, 14371.90it/s][A
 88%|████████▊ | 60989/69420 [00:02<00:00, 13603.27it/s][A
 90%|████████▉ | 62304/69420 [00:02<00:00, 12909.74it/s][A
 92%|█████████▏| 63554/69420 [00:02<00:00, 12289.44it/s][A
 93%|█████████▎| 64747/69420 [00:02<00:00, 11758.73i

Saving ACDC++ Graph
Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13806/69420 [00:00<00:00, 138054.54it/s][A
 40%|███▉      | 27612/69420 [00:00<00:00, 56275.79it/s] [A
 51%|█████▏    | 35637/69420 [00:00<00:00, 39420.67it/s][A
 59%|█████▉    | 41114/69420 [00:01<00:00, 31642.83it/s][A
 65%|██████▌   | 45202/69420 [00:01<00:00, 26625.43it/s][A
 70%|██████▉   | 48411/69420 [00:01<00:00, 23515.77it/s][A
 74%|███████▎  | 51059/69420 [00:01<00:00, 21116.24it/s][A
 77%|███████▋  | 53314/69420 [00:01<00:00, 19253.49it/s][A
 80%|███████▉  | 55289/69420 [00:02<00:00, 17628.40it/s][A
 82%|████████▏ | 57048/69420 [00:02<00:00, 16417.62it/s][A
 84%|████████▍ | 58656/69420 [00:02<00:00, 15083.52it/s][A
 87%|████████▋ | 60119/69420 [00:02<00:00, 14095.57it/s][A
 89%|████████▊ | 61479/69420 [00:02<00:00, 13336.54it/s][A
 90%|█████████ | 62765/69420 [00:02<00:00, 12636.66it/s][A
 92%|█████████▏| 63986/69420 [00:02<00:00, 11965.69it/s][A
 94%|█████████▍| 65146/69420 [00:02<00:00, 11480.49i

Saving ACDC++ Graph


 83%|████████▎ | 25/30 [3:54:52<08:07, 97.54s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13402/69420 [00:00<00:00, 133950.83it/s][A
 39%|███▊      | 26798/69420 [00:00<00:00, 56986.22it/s] [A
 50%|████▉     | 34704/69420 [00:00<00:00, 40921.79it/s][A
 58%|█████▊    | 40192/69420 [00:01<00:00, 32373.02it/s][A
 64%|██████▍   | 44295/69420 [00:01<00:00, 27359.18it/s][A
 68%|██████▊   | 47541/69420 [00:01<00:00, 24333.26it/s][A
 72%|███████▏  | 50247/69420 [00:01<00:00, 21601.35it/s][A
 76%|███████▌  | 52541/69420 [00:01<00:00, 19822.05it/s][A
 79%|███████▊  | 54567/69420 [00:01<00:00, 17785.16it/s][A
 81%|████████  | 56345/69420 [00:02<00:00, 16573.28it/s][A
 84%|████████▎ | 57971/69420 [00:02<00:00, 15570.49it/s][A
 86%|████████▌ | 59483/69420 [00:02<00:00, 14383.43it/s][A
 88%|████████▊ | 60875/69420 [00:02<00:00, 13486.53it/s][A
 90%|████████▉ | 62178/69420 [00:02<00:00, 12873.61it/s][A
 91%|█████████▏| 63424/69420 [00:02<00:00, 12444.13it/s][A
 93%|█████████▎| 64633/69420 [00:02<00:00, 11943.96i

Saving ACDC++ Graph


 87%|████████▋ | 26/30 [3:56:26<06:26, 96.63s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13402/69420 [00:00<00:00, 134013.10it/s][A
 39%|███▊      | 26804/69420 [00:00<00:00, 57238.67it/s] [A
 50%|█████     | 34726/69420 [00:00<00:00, 41268.79it/s][A
 58%|█████▊    | 40237/69420 [00:01<00:00, 32213.26it/s][A
 64%|██████▍   | 44336/69420 [00:01<00:00, 27245.45it/s][A
 69%|██████▊   | 47577/69420 [00:01<00:00, 23974.61it/s][A
 72%|███████▏  | 50256/69420 [00:01<00:00, 21342.07it/s][A
 76%|███████▌  | 52529/69420 [00:01<00:00, 19497.03it/s][A
 79%|███████▊  | 54525/69420 [00:01<00:00, 17449.50it/s][A
 81%|████████  | 56271/69420 [00:02<00:00, 16211.84it/s][A
 83%|████████▎ | 57862/69420 [00:02<00:00, 15460.64it/s][A
 86%|████████▌ | 59364/69420 [00:02<00:00, 14235.05it/s][A
 88%|████████▊ | 60743/69420 [00:02<00:00, 13436.74it/s][A
 89%|████████▉ | 62043/69420 [00:02<00:00, 12896.62it/s][A
 91%|█████████ | 63292/69420 [00:02<00:00, 12518.97it/s][A
 93%|█████████▎| 64510/69420 [00:02<00:00, 11980.14i

Saving ACDC++ Graph


 90%|█████████ | 27/30 [3:58:00<04:47, 95.72s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13822/69420 [00:00<00:00, 138161.17it/s][A
 40%|███▉      | 27639/69420 [00:00<00:00, 56203.32it/s] [A
 51%|█████▏    | 35664/69420 [00:00<00:00, 39502.28it/s][A
 59%|█████▉    | 41146/69420 [00:01<00:00, 31659.10it/s][A
 65%|██████▌   | 45235/69420 [00:01<00:00, 26958.53it/s][A
 70%|██████▉   | 48467/69420 [00:01<00:00, 23956.28it/s][A
 74%|███████▎  | 51151/69420 [00:01<00:00, 21527.69it/s][A
 77%|███████▋  | 53441/69420 [00:01<00:00, 19556.00it/s][A
 80%|███████▉  | 55443/69420 [00:02<00:00, 17673.82it/s][A
 82%|████████▏ | 57208/69420 [00:02<00:00, 16495.41it/s][A
 85%|████████▍ | 58824/69420 [00:02<00:00, 15413.18it/s][A
 87%|████████▋ | 60319/69420 [00:02<00:00, 14271.70it/s][A
 89%|████████▉ | 61698/69420 [00:02<00:00, 13436.50it/s][A
 91%|█████████ | 62994/69420 [00:02<00:00, 12893.23it/s][A
 93%|█████████▎| 64241/69420 [00:02<00:00, 12460.23it/s][A
 94%|█████████▍| 65451/69420 [00:02<00:00, 11756.94i

Saving ACDC++ Graph


 93%|█████████▎| 28/30 [3:59:33<03:09, 94.94s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 20%|█▉        | 13823/69420 [00:00<00:00, 138162.93it/s][A
 40%|███▉      | 27640/69420 [00:00<00:00, 56279.97it/s] [A
 51%|█████▏    | 35669/69420 [00:00<00:00, 39858.57it/s][A
 59%|█████▉    | 41171/69420 [00:01<00:00, 31783.36it/s][A
 65%|██████▌   | 45271/69420 [00:01<00:00, 27118.89it/s][A
 70%|██████▉   | 48516/69420 [00:01<00:00, 24032.83it/s][A
 74%|███████▍  | 51207/69420 [00:01<00:00, 21537.58it/s][A
 77%|███████▋  | 53499/69420 [00:01<00:00, 19478.98it/s][A
 80%|███████▉  | 55495/69420 [00:02<00:00, 17663.29it/s][A
 82%|████████▏ | 57258/69420 [00:02<00:00, 16399.99it/s][A
 85%|████████▍ | 58864/69420 [00:02<00:00, 15431.06it/s][A
 87%|████████▋ | 60360/69420 [00:02<00:00, 14277.62it/s][A
 89%|████████▉ | 61739/69420 [00:02<00:00, 13485.17it/s][A
 91%|█████████ | 63040/69420 [00:02<00:00, 12956.47it/s][A
 93%|█████████▎| 64293/69420 [00:02<00:00, 12511.53it/s][A
 94%|█████████▍| 65508/69420 [00:02<00:00, 11837.57i

Saving ACDC++ Graph


 97%|█████████▋| 29/30 [4:01:06<01:34, 94.59s/it]

Saving ACDC Graph




ln_final.hook_normalized
ln_final.hook_scale
blocks.11.hook_resid_post
blocks.11.hook_mlp_out
blocks.11.mlp.hook_post
blocks.11.mlp.hook_pre
blocks.11.ln2.hook_normalized
blocks.11.ln2.hook_scale
blocks.11.hook_mlp_in
blocks.11.hook_resid_mid
blocks.11.hook_attn_out
blocks.11.attn.hook_result
blocks.11.attn.hook_z
blocks.11.attn.hook_pattern
blocks.11.attn.hook_attn_scores
blocks.11.attn.hook_v
blocks.11.attn.hook_k
blocks.11.attn.hook_q
blocks.11.ln1.hook_normalized
blocks.11.ln1.hook_scale
blocks.11.hook_v_input
blocks.11.hook_k_input
blocks.11.hook_q_input
blocks.11.hook_resid_pre
blocks.10.hook_resid_post
blocks.10.hook_mlp_out
blocks.10.mlp.hook_post
blocks.10.mlp.hook_pre
blocks.10.ln2.hook_normalized
blocks.10.ln2.hook_scale
blocks.10.hook_mlp_in
blocks.10.hook_resid_mid
blocks.10.hook_attn_out
blocks.10.attn.hook_result
blocks.10.attn.hook_z
blocks.10.attn.hook_pattern
blocks.10.attn.hook_attn_scores
blocks.10.attn.hook_v
blocks.10.attn.hook_k
blocks.10.attn.hook_q
blocks.10.ln


  0%|          | 0/69420 [00:00<?, ?it/s][A
 19%|█▉        | 13397/69420 [00:00<00:00, 133932.13it/s][A
 39%|███▊      | 26791/69420 [00:00<00:00, 57570.90it/s] [A
 50%|█████     | 34727/69420 [00:00<00:00, 41400.07it/s][A
 58%|█████▊    | 40248/69420 [00:01<00:00, 32325.82it/s][A
 64%|██████▍   | 44357/69420 [00:01<00:00, 27359.77it/s][A
 69%|██████▊   | 47608/69420 [00:01<00:00, 24213.00it/s][A
 72%|███████▏  | 50307/69420 [00:01<00:00, 21661.01it/s][A
 76%|███████▌  | 52607/69420 [00:01<00:00, 19794.89it/s][A
 79%|███████▊  | 54631/69420 [00:01<00:00, 17741.09it/s][A
 81%|████████▏ | 56405/69420 [00:02<00:00, 16466.83it/s][A
 84%|████████▎ | 58021/69420 [00:02<00:00, 15650.78it/s][A
 86%|████████▌ | 59541/69420 [00:02<00:00, 14183.67it/s][A
 88%|████████▊ | 60918/69420 [00:02<00:00, 13476.81it/s][A
 90%|████████▉ | 62223/69420 [00:02<00:00, 12823.41it/s][A
 91%|█████████▏| 63466/69420 [00:02<00:00, 12456.98it/s][A
 93%|█████████▎| 64678/69420 [00:02<00:00, 12029.69i

Saving ACDC++ Graph


100%|██████████| 30/30 [4:02:41<00:00, 485.37s/it]

Saving ACDC Graph





# Save Data

In [9]:
import json

def convert_to_torch_index(index_list):
    return ''.join(['None' if i == ':' else i for i in index_list])

for thresh in pruned_heads.keys():
    pruned_heads[thresh][0] = list(pruned_heads[thresh][0])
    pruned_heads[thresh][1] = list(pruned_heads[thresh][1])

cleaned_attrs = {}
for thresh in pruned_attrs.keys():
    cleaned_attrs[thresh] = []
    for ((e1, i1), (e2, i2)), attr in pruned_attrs[thresh].items():
        cleaned_attrs[thresh].append([e1, convert_to_torch_index(str(i1)), e2, convert_to_torch_index(str(i2)), attr])
        
with open(f'{RUN_NAME}_pruned_heads.json', 'w') as f:
    json.dump(pruned_heads, f)
with open(f'{RUN_NAME}_num_passes.json', 'w') as f:
    json.dump(num_passes, f)
with open(f'{RUN_NAME}_pruned_attrs.json', 'w') as f:
    json.dump(cleaned_attrs, f)