# Max activating examples for 10.7 (by norm projection to logits)

Want to see where head 10.7 is most useful!

In [None]:
from transformer_lens.cautils.notebook import *

In [None]:
from transformer_lens.rs.callum.max_activating_exploration import print_best_outputs, find_best_improvements

In [None]:
model = HookedTransformer.from_pretrained("gpt2-small", device=device)
model.set_use_attn_result(True)

In [None]:
data = get_webtext(seed=6)

In [None]:
LAYER_IDX, HEAD_IDX = (10, 7)
W_U = model.W_U.clone()
HEAD_HOOK_NAME = utils.get_act_name("result", LAYER_IDX)

NUM_PROMPTS = 100
BATCH_SIZE = 10

In [None]:
def hook_to_ablate_head(head_output: Float[Tensor, "batch seq_len head_idx d_head"], hook: HookPoint, head = (LAYER_IDX, HEAD_IDX)):
    assert head[0] == hook.layer()
    assert "result" in hook.name
    head_output[:, :, head[1], :] = 0
    return head_output

## How does 10.7 affect the logits when it's ablated?

We can see cross-entropy loss increases by 0.01 on average when this head is ablated. That might seem like not a lot, but it's actually not far off distribution to other late-stage heads.

In [None]:
str_token_list = []
loss_list = []
ablated_loss_list = []

for i in tqdm(range(NUM_PROMPTS)):
    # new_str = data[BATCH_SIZE * i: BATCH_SIZE * (i + 1)]
    new_str = data[i]
    new_str_tokens = model.to_str_tokens(new_str)
    tokens = model.to_tokens(new_str)
    # tokens = t.stack(tokens).to(device)
    loss = model(tokens, return_type="loss", loss_per_token=True)
    ablated_loss = model.run_with_hooks(tokens, return_type="loss", loss_per_token=True, fwd_hooks=[(HEAD_HOOK_NAME, hook_to_ablate_head)])
    loss_list.append(loss)
    ablated_loss_list.append(ablated_loss)
    str_token_list.append(new_str_tokens)


all_loss = t.cat(loss_list, dim=-1).squeeze()
all_ablated_loss = t.cat(ablated_loss_list, dim=-1).squeeze()

hist(
    all_ablated_loss - all_loss,
    title="Difference in loss after ablating (positive ⇒ loss increases)",
    labels={"x": "Difference in cross-entropy loss"},
    template="simple_white",
    add_mean_line=True,
    width=1000,
    nbins=200
)

In [None]:
total_num_tokens = sum(len(i) for i in str_token_list)
top_pct = int(total_num_tokens * 0.01)

best_k_indices, best_k_loss_decrease = find_best_improvements(str_token_list, loss_list, ablated_loss_list, k=top_pct)
worst_k_indices, worst_k_loss_decrease = find_best_improvements(str_token_list, loss_list, ablated_loss_list, k=top_pct, worst=True)

In [71]:
caches_and_tokens = print_best_outputs(
    best_k_indices[:3],
    best_k_loss_decrease[:3],
    hook = (HEAD_HOOK_NAME, hook_to_ablate_head),
    model = model,
    data = data,
    n = 3,
    random = False,
    return_caches = False,
)

In [75]:
caches_and_tokens = print_best_outputs(
    best_k_indices[:3],
    best_k_loss_decrease[:3],
    hook = (HEAD_HOOK_NAME, hook_to_ablate_head),
    model = model,
    data = data,
    n = 3,
    random = False,
    return_caches = True,
    names_filter = lambda name: name == utils.get_act_name("pattern", LAYER_IDX),
)
clear_output()

In [79]:
p = Path("/home/ubuntu/Transformerlens/transformer_lens/rs/callum/plots")

window = 100

for i, (cache, tokens) in enumerate(caches_and_tokens):
    
    pattern = cache["pattern", LAYER_IDX][:, HEAD_IDX]
    pattern_sliced = pattern[:, -window:, -window:]
    html = cv.attention.attention_heads(
        attention = pattern_sliced,
        tokens = tokens[-window:],
        attention_head_names = [f"{LAYER_IDX}.{HEAD_IDX}, example {i}"]
    )
    
    with open(str((p / f"temp_file_{i}.html").resolve()), "w") as f:
        f.write(str(html))

    print("".join(tokens[-window:]))
    print("\n" + "=" * 60 + "\n")

 themselves to the church-- it's been fun."

Before, those of the Mormon faith in Nixa and Ozark had to drive to Springfield.

"Then, as the need grew, we were too crowded in that building," said Bishop Guison. "So the next choice was what would be the most benefit to members and the community? That's why this location was chosen."

"It gives a foundation to our faith, a sense of permanence," said Bishop


okma.com scorpions.

Related

Today, after Esme and I had returned from some shopping at Wal-Mart, I discovered that one scorpion was giving birth: a Diplocentrus species, probably Diplocentrus melici, which we had captured the 23rd of April, 2006. Later that day we captured another female. The latter gave birth some time ago, but shortly after died. My best guess is that it somehow got ill, and aborted the


 mascot by the Reveille name in May 2001.

Reveille VII retired in the summer of 2008, and had been living with local residents Paul and Tina Gardner.

Details on services for t

In [None]:

def clear_plots():
    for i in p.glob("temp_file_*.html"):
        i.unlink()

clear_plots()