In [1]:
import os
DEV_MODE = True
try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
except:
    IN_COLAB = False
# Install if in Colab
if IN_COLAB:
    %pip install transformer_lens
    %pip install circuitsvis
    # Install a faster Node version
    !curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -; sudo apt-get install -y nodejs  # noqa

# Hot reload in development mode & not running on the CD
if not IN_COLAB:
    from IPython import get_ipython
    ip = get_ipython()
    if not ip.extension_manager.loaded:
        ip.extension_manager.load('autoreload')
        %autoreload 2
        
IN_GITHUB = os.getenv("GITHUB_ACTIONS") == "true"

# change renderer to colab if needed
import plotly.io as pio
if IN_COLAB or not DEV_MODE:
    pio.renderers.default = "colab"
else:
    pio.renderers.default = "notebook_connected"
    
print(f"Using renderer: {pio.renderers.default}")

# import circuit vis
import circuitsvis as cv
# Testing that the library works
cv.examples.hello("Neel")

import warnings
warnings.filterwarnings("ignore")

# Main imports
import torch
import torch.nn as nn
import einops
from fancy_einsum import einsum
import tqdm.auto as tqdm
import plotly.express as px

from jaxtyping import Float
from functools import partial

# transformer lens stuff
import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookPoint,
)  # Hooking utilities
from transformer_lens import HookedTransformer, FactoredMatrix

# set grad to false cuz we dont need to train
torch.set_grad_enabled(False)

from plot_utils import *

device = "cpu"


Using renderer: notebook_connected


In [2]:
# different models have different ckpts
from transformer_lens.loading_from_pretrained import get_checkpoint_labels
for model_name in ["attn-only-2l", "solu-12l", "stanford-gpt2-small-a"]:
    checkpoint_labels, checkpoint_label_type = get_checkpoint_labels(model_name)
    line(checkpoint_labels, xaxis="Checkpoint Index", yaxis=f"Checkpoint Value ({checkpoint_label_type})", title=f"Checkpoint Values for {model_name} (Log scale)", log_y=True, markers=True)
for model_name in ["solu-1l-pile", "solu-6l-pile"]:
    checkpoint_labels, checkpoint_label_type = get_checkpoint_labels(model_name)
    line(checkpoint_labels, xaxis="Checkpoint Index", yaxis=f"Checkpoint Value ({checkpoint_label_type})", title=f"Checkpoint Values for {model_name} (Linear scale)", log_y=False, markers=True)


# phase transition and induction heads

In [3]:
from transformer_lens import evals
# We use the two layer model with SoLU activations, chosen fairly arbitrarily as being both small (so fast to download and keep in memory) and pretty good at the induction task.
model_name = "solu-2l"
# We can load a model from a checkpoint by specifying the checkpoint_index, -1 means the final checkpoint
checkpoint_indices = [10, 25, 35, 60, -1]
checkpointed_models = []
tokens_trained_on = []
induction_losses = []



In [None]:
for index in checkpoint_indices:
        # Load the model from the relevant checkpoint by index
        model_for_this_checkpoint = HookedTransformer.from_pretrained(model_name, checkpoint_index=index, device=device)
        checkpointed_models.append(model_for_this_checkpoint)

        tokens_seen_for_this_checkpoint = model_for_this_checkpoint.cfg.checkpoint_value
        tokens_trained_on.append(tokens_seen_for_this_checkpoint)

        induction_loss_for_this_checkpoint = evals.induction_loss(model_for_this_checkpoint, device=device).item()
        induction_losses.append(induction_loss_for_this_checkpoint)


In [None]:
line(induction_losses, x=tokens_trained_on, xaxis="Tokens Trained On", yaxis="Induction Loss", title="Induction Loss over training: solu-2l", markers=True, log_x=True)
