# Playground: Induction Topology Explorer
---
Use this notebook to get comfortable with the pipeline, models, and plots.
Each section is separated by a horizontal rule for clarity.
---

## 1. Environment check
---

In [11]:
from importlib.metadata import version, PackageNotFoundError
import torch, transformer_lens

print("torch:", torch.__version__)
try:
    print("transformer_lens (dist):", version("transformer-lens"))
except PackageNotFoundError:
    print("transformer-lens dist not found")

print("CUDA:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


ModuleNotFoundError: No module named 'transformer_lens'

## 2. Load modules and model
---

In [None]:

import os, json, numpy as np, sys
sys.path.append(os.path.abspath("src"))
from prompts import make_abab_prompts, make_name_prompts, find_copy_ops
from runner import load_model, to_tokens, run_with_cache_all_layers
from scoring import compute_scores, layer_profile, copy_accuracy_from_logits
from plots import heatmap, profile_curve

MODEL_ID = "pythia-410m"   # change to "gpt2" or "pythia-1b" if installed
SEQ_LEN = 256
BATCH = 8


## 3. Create prompts and tokenize
---

In [None]:

# Mix ABAB and NAMES prompts; keep it small for a quick test
prompts_abab = make_abab_prompts(n=BATCH//2, seq_len=SEQ_LEN, lag_values=[1,2,4,8,16], mix="uniform")
prompts_names = make_name_prompts(n=BATCH - len(prompts_abab), seq_len=SEQ_LEN, lag_values=[2,4,8,16], mix="uniform")
prompts = prompts_abab + prompts_names

model = load_model(MODEL_ID, device="cuda", dtype="float16")
toks = to_tokens(model, prompts, seq_len=SEQ_LEN, prepend_bos=True)
toks = toks.to("cuda")

copy_ops = find_copy_ops(toks, allowed_lags=None)
print(f"Total copy opportunities found: {len(copy_ops)} across batch size {toks.shape[0]} and seq_len {toks.shape[1]}")


## 4. Run with cache, inspect attention of one layer/head
---

In [None]:

logits, cache = run_with_cache_all_layers(model, toks, names_filter=["attn"])
L, H = model.cfg.n_layers, model.cfg.n_heads
print("Layers:", L, "Heads per layer:", H)

# Visualize attention from an example (b,t) at a middle layer for head 0
if copy_ops:
    b, t, s = copy_ops[0]
    layer = L // 2  # mid layer
    A = cache["attn", layer]  # [B,H,Q,K]

    import matplotlib.pyplot as plt
    plt.figure(figsize=(7,4))
    plt.plot(A[b, 0, t, :].detach().float().cpu().numpy())
    plt.xlabel("Key position (s)")
    plt.ylabel("Attention weight (head 0)")
    plt.title(f"Layer {layer} attention from t={t} (b={b})")
    plt.tight_layout()
    plt.show()
else:
    print("No copy opportunities found; adjust prompts/seq_len/lag_values and rerun.")


## 5. Compute per-head scores and per-layer profile
---

In [None]:

scores = compute_scores(model, toks, copy_ops, memory_saver=False)  # [L, H]
print("Score matrix shape:", scores.shape)

# Save and plot
os.makedirs("figs", exist_ok=True)
heatmap(scores, "figs/heatmap_playground.png", title=f"Induction scores (model={MODEL_ID})")

prof = layer_profile(scores, mode="mean")
profile_curve(prof['per_layer'], "figs/profile_playground.png", title="Per-layer profile (mean)", annotate_peak=True)

print("Peak layer:", prof["peak_layer"], "Relative:", prof["peak_rel"], "Bandwidth:", prof["bandwidth"])


## 6. Copy accuracy at copy positions (baseline)
---

In [None]:

acc = copy_accuracy_from_logits(logits, toks, copy_ops)
print("Baseline copy accuracy at copy positions:", acc)
