# SETUP

In [37]:
# Import stuff
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import einops
from fancy_einsum import einsum
import tqdm.auto as tqdm
import random
from pathlib import Path
import plotly.express as px
from torch.utils.data import DataLoader

# from torchtyping import TensorType as TT
from typing import List, Union, Optional
from functools import partial
import copy
import gc

import itertools
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
import dataclasses
import datasets
from IPython.display import HTML
from einops import rearrange

# from sklearn.decomposition import PCA
from scipy.special import softmax
from scipy.interpolate import BSpline, make_interp_spline

import transformer_lens
import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookedRootModule,
    HookPoint,
)  # Hooking utilities
from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache, loading_from_pretrained
from transformer_lens.loading_from_pretrained import get_checkpoint_labels, get_official_model_name
torch.set_grad_enabled(False)

from transformer_lens import evals
import matplotlib.pyplot as plt

import plotly.io as pio
import plotly.graph_objects as go
pio.renderers.default = "notebook_connected"
torch.set_grad_enabled(False)

config = {
  'toImageButtonOptions': {
    'format': 'png', # one of png, svg, jpeg, webp
    'filename': 'custom_image',
    'height': 500,
    'width': 900,
    'scale':10 # Multiply title/legend/axis/canvas sizes by this factor
  }
}

In [38]:
print(torch.cuda.get_device_properties(0), torch.cuda.device_count())
device = "cuda" if torch.cuda.is_available() else "cpu"

_CudaDeviceProperties(name='GeForce RTX 2080 Ti', major=7, minor=5, total_memory=11019MB, multi_processor_count=68) 4


In [28]:
def plot_effective_dimensionality(model_to_effective_dimension_qk, model_to_tokens_trained_on, title):
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    variance_alpha = 0.1

    fig = go.Figure()
    fig.update_layout(title=f'{title}')
    # fig.update_xaxes(title="% token in training set", type='log')
    # fig.update_yaxes(title="Effective dimensionality", type='log')

    for i, (model_name, color) in enumerate(zip(model_to_effective_dimension_qk, colors)):
        effective_dimension = np.asarray(model_to_effective_dimension_qk[model_name])
        effective_dimension = effective_dimension/effective_dimension[0]


        tokens_trained_on = np.asarray(model_to_tokens_trained_on[model_name])
        tokens_trained_on = 100 * (tokens_trained_on / tokens_trained_on[-1])
        tokens_trained_on[0] = 1e-2


        mean_val = np.mean(effective_dimension, axis=1)
        std_val = 0.5 * np.std(effective_dimension, axis=1)

        fig.add_trace(go.Scatter(x=tokens_trained_on, y=mean_val, name=f'{model_name}',
                                 mode='lines+markers', line=dict(color=color), marker=dict(color=color)))

        fig.add_trace(go.Scatter(x=tokens_trained_on.tolist() + tokens_trained_on.tolist()[::-1],
                                 y=(mean_val - std_val).tolist() + (mean_val + std_val).tolist()[::-1],
                                 fill='toself', fillcolor=f'rgba({int(color[1:3], 16)}, {int(color[3:5], 16)}, {int(color[5:7], 16)}, {variance_alpha})',
                                 line=dict(color='rgba(255,255,255,0)'),
                                 name=f'{model_name} - Variance', showlegend=False))
    return fig



In [29]:
def plot_3d_trajectories(data):
    """
    Plot 3D trajectories of the curves with the specified modifications.
    
    Parameters:
    - data: A numpy array of shape [NxTx3]
    """
    
    # Create a 3D plot
    fig = go.Figure()
    
    # Function to perform B-spline interpolation
    def interpolate_bspline(points, k=3, num=100):
        """Interpolate points with a B-spline."""
        t = np.linspace(0, 1, len(points))
        t_new = np.linspace(0, 1, num)
        spline = make_interp_spline(t, points, k=k)
        return spline(t_new)
    
    # Aesthetically pleasing color palette
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    
    # Loop through each curve
    for i in range(data.shape[0]):
        curve = data[i]
        color = colors[i % len(colors)]  # Cycle through colors if there are more curves than colors
        
        # Add a scatter plot for the actual data points with dimmed colors
        fig.add_trace(go.Scatter3d(x=curve[:, 0], y=curve[:, 1], z=curve[:, 2], 
                                   mode='markers',
                                   marker=dict(size=5, opacity=0.5, color=color),
                                   showlegend=False))
        
        # Add a line to represent the smooth trajectory using B-spline interpolation
        x_new, y_new, z_new = interpolate_bspline(curve).T
        fig.add_trace(go.Scatter3d(x=x_new, y=y_new, z=z_new, 
                                   mode='lines',
                                   line=dict(width=2, color=color),
                                   name=f'Curve {i + 1}'))
    
    # Update layout to remove axis labels and values
    fig.update_layout(scene=dict(xaxis=dict(showticklabels=False, title=""),
                                 yaxis=dict(showticklabels=False, title=""),
                                 zaxis=dict(showticklabels=False, title="")),
                      showlegend=True)
    
    return fig

In [30]:
def effective_rank(model):
    with torch.no_grad():
        q_ov = model.OV.svd()[1]
        q_qk = model.QK.svd()[1]
        q_ov = q_ov/q_ov.sum(dim=-1, keepdim=True)
        q_qk = q_qk/q_qk.sum(dim=-1, keepdim=True)
        q_ov = torch.sum(q_ov * torch.log(q_ov), dim=-1)
        q_qk = torch.sum(q_qk * torch.log(q_qk), dim=-1)
        dims = [torch.exp(-q_ov).mean(axis=-1).cpu().detach().numpy(), torch.exp(-q_qk).mean(axis=-1).cpu().detach().numpy()]

    del q_ov, q_qk
    torch.cuda.empty_cache()
    return dims


def uniformly_spaced_integers(N, k):
    # Compute the step size
    step = N / (k-1)
    
    # Generate the numbers
    numbers = np.arange(0, N+1, step, dtype=int)
    
    # If due to floating point inaccuracies the numbers are less than k, adjust the last number
    if len(numbers) < k:
        numbers = np.append(numbers, N)
    elif len(numbers) > k:
        numbers = numbers[:k]

    # Append -1 to the array
    numbers = np.append(numbers, -1) 

    return numbers

def log_scaled_integers_v2(N, k, base=np.e):
    # Generate k-1 numbers between log(2) (to ensure we start from a non-zero value) and log_base(N+1)
    log_values = np.linspace(np.log(2)/np.log(base), np.log(N+1)/np.log(base), k)
    
    # Use the exponential function with the desired base to expand the range
    numbers = (base ** log_values - 1) * (N / (base ** (np.log(N+1)/np.log(base)) - 1))
    numbers = np.round(numbers).astype(int)
    
    # Ensure the array size is k+1 by appending -1 at the end and prepend 0 at the beginning
    numbers = numbers[2:-1]
    numbers = np.append(0, numbers)
    numbers = np.append(numbers, -1)
    
    return numbers



# Weights Analysis

In [31]:

torch.cuda.empty_cache()
step_size = 20
model_to_effective_dimension_ov = {}
model_to_effective_dimension_qk = {}
model_to_tokens_trained_on = {}

# all_models = [
#     "attn-only-1l", "attn-only-2l", "attn-only-3l", "attn-only-4l",
#     "stanford-gpt2-small-a", "stanford-gpt2-medium-a"]
all_models = [
    "attn-only-1l", "attn-only-2l", "attn-only-3l", "attn-only-4l",
    "stanford-gpt2-small-a", "stanford-gpt2-small-c", "stanford-gpt2-small-d", 
    # "stanford-gpt2-medium-a", "stanford-gpt2-medium-c", "stanford-gpt2-medium-d"]
    "stanford-gpt2-medium-c", "stanford-gpt2-medium-d"]
for model_name in all_models:
    print(model_name)
    checkpoints = get_checkpoint_labels(get_official_model_name(model_name))[0]
    checkpoints = log_scaled_integers_v2(len(checkpoints), step_size, 10)
    if model_name == "stanford-gpt2-medium-d":
        checkpoints[0] = 1
    tokens_trained_on = []
    effective_dimension_ov =[]
    effective_dimension_qk =[]
    
    for index in checkpoints:
        print(index)
        model_for_this_checkpoint = HookedTransformer.from_pretrained(model_name, checkpoint_index=index, device="cpu")

        tokens_seen_for_this_checkpoint = model_for_this_checkpoint.cfg.checkpoint_value
        tokens_trained_on.append(tokens_seen_for_this_checkpoint)

        temp_ov, temp_qk = effective_rank(model_for_this_checkpoint)

        effective_dimension_ov.append(temp_ov)
        effective_dimension_qk.append(temp_qk)

        del temp_ov, temp_qk, model_for_this_checkpoint
        torch.cuda.empty_cache()
        gc.collect()

    model_to_tokens_trained_on[model_name] = tokens_trained_on
    model_to_effective_dimension_ov[model_name] = effective_dimension_ov
    model_to_effective_dimension_qk[model_name] = effective_dimension_qk

    del tokens_trained_on, effective_dimension_ov, effective_dimension_qk
    torch.cuda.empty_cache()
    gc.collect()


attn-only-1l
0
Loaded pretrained model attn-only-1l into HookedTransformer
2
Loaded pretrained model attn-only-1l into HookedTransformer
3
Loaded pretrained model attn-only-1l into HookedTransformer
4
Loaded pretrained model attn-only-1l into HookedTransformer
5
Loaded pretrained model attn-only-1l into HookedTransformer
7
Loaded pretrained model attn-only-1l into HookedTransformer
9
Loaded pretrained model attn-only-1l into HookedTransformer
12
Loaded pretrained model attn-only-1l into HookedTransformer
15
Loaded pretrained model attn-only-1l into HookedTransformer
19
Loaded pretrained model attn-only-1l into HookedTransformer
25
Loaded pretrained model attn-only-1l into HookedTransformer
31
Loaded pretrained model attn-only-1l into HookedTransformer
40
Loaded pretrained model attn-only-1l into HookedTransformer
50
Loaded pretrained model attn-only-1l into HookedTransformer
64
Loaded pretrained model attn-only-1l into HookedTransformer
81
Loaded pretrained model attn-only-1l into Hook

Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
stanford-gpt2-small-c
0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
stanford-gpt2-small-d
0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
stanford-gpt2-medium-c
0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
stanford-gpt2-medium-d
1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer


In [None]:
# torch.save(model_logits, 'attn+stan_model_logits.pth')

In [40]:
fig = plot_effective_dimensionality(model_to_effective_dimension_qk, model_to_tokens_trained_on, "W_QK dim")
fig.add_vrect(x0=0.5, x1=1.5, line_width=1, fillcolor="gold", opacity=0.2)
fig.update_xaxes(title="% token in training set", type='log')
fig.update_yaxes(title="Effective dimensionality", type='log')
fig.show(config=config)
fig.write_image("w_qk.png", scale=6)

fig = plot_effective_dimensionality(model_to_effective_dimension_ov, model_to_tokens_trained_on, "W_OV dim")
fig.add_vrect(x0=0.5, x1=1.5, line_width=1, fillcolor="gold", opacity=0.2)
fig.update_xaxes(title="% token in training set", type='log')
fig.update_yaxes(title="Effective dimensionality", type='log')
fig.show(config=config)
fig.write_image("w_ov.png", scale=6)

# PCA

In [6]:
# Small batch size to avoid cuda memory issues on colab
model_attn = HookedTransformer.from_pretrained("attn-only-2l", device='cpu')
model_stan = HookedTransformer.from_pretrained("stanford-gpt2-small-a", device='cpu')
pile_batch_size = 1
pile_dataloader = evals.make_pile_data_loader(tokenizer=model_attn.tokenizer, batch_size=pile_batch_size)

Loaded pretrained model attn-only-2l into HookedTransformer


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
10000


In [None]:
# just to dowlonload the models
all_models = [
    "attn-only-1l", "attn-only-2l", "attn-only-3l", "attn-only-4l",]
#    "stanford-gpt2-medium-d"]

for model_name in all_models:
    print(model_name)
    checkpoints = get_checkpoint_labels(get_official_model_name(model_name))[0]
    checkpoints = log_scaled_integers_v2(len(checkpoints), step_size, 10)
    if model_name == "stanford-gpt2-medium-d":
        checkpoints[0] = 1

    for index in checkpoints:
        print(index)
        model_for_this_checkpoint = HookedTransformer.from_pretrained(model_name, checkpoint_index=index, device=device)
        del model_for_this_checkpoint
        torch.cuda.empty_cache()

In [21]:
#Step 1: get model internal activities
torch.cuda.empty_cache()

step_size = 20
token = next(iter(pile_dataloader))['tokens'].to(device)
model_to_tokens_trained_on = []

all_models = [
    "attn-only-1l", "attn-only-2l", "attn-only-3l", "attn-only-4l",
    "stanford-gpt2-small-a", "stanford-gpt2-small-c", "stanford-gpt2-small-d", 
    "stanford-gpt2-medium-a", "stanford-gpt2-medium-c", "stanford-gpt2-medium-d"]

selected_columns = random.sample(range(model_stan.cfg.d_vocab), model_attn.cfg.d_vocab)

model_logits = torch.zeros(((step_size-1)*len(all_models), token.size()[1], model_attn.cfg.d_vocab), device="cpu")
for mc, model_name in enumerate(all_models):
    print(model_name)
    checkpoints = get_checkpoint_labels(get_official_model_name(model_name))[0]
    checkpoints = log_scaled_integers_v2(len(checkpoints), step_size, 10)
    if model_name == "stanford-gpt2-medium-d":
        checkpoints[0] = 1
    tokens_trained_on = []
    
    for idx, index in enumerate(checkpoints):
        print(f"Checkpoint {index}")
        model_for_this_checkpoint = HookedTransformer.from_pretrained(model_name, checkpoint_index=index, device=device)
        tokens_trained_on.append(model_for_this_checkpoint.cfg.checkpoint_value)

        if 'stanford' in model_name:
            model_logits[mc*len(checkpoints)+idx, :, :] += model_for_this_checkpoint(token, return_type="logits").mean(dim=0, keepdim=True).squeeze()[:, selected_columns].cpu()
        else:
            model_logits[mc*len(checkpoints)+idx, :, :] += model_for_this_checkpoint(token, return_type="logits").mean(dim=0, keepdim=True).squeeze().cpu()


        del model_for_this_checkpoint
        torch.cuda.empty_cache()
    model_to_tokens_trained_on.append(tokens_trained_on)
    del tokens_trained_on
    torch.cuda.empty_cache()


# torch.save(model_logits, 'attn+stan_model_logits.pth')

attn-only-1l
Checkpoint 0
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 2
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 3
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 4
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 5
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 7
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 9
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 12
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 15
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 19
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 25
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 31
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 40
Loaded pretrained model attn-only-1l into HookedTransformer
Checkpoint 50
Loaded pretrained 

Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint 450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
Checkpoint -1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-a into HookedTransformer
stanford-gpt2-small-c
Checkpoint 0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint 450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
Checkpoint -1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-c into HookedTransformer
stanford-gpt2-small-d
Checkpoint 0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint 450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
Checkpoint -1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-small-d into HookedTransformer
stanford-gpt2-medium-a
Checkpoint 0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint 450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
Checkpoint -1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
stanford-gpt2-medium-c
Checkpoint 0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint 450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
Checkpoint -1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
stanford-gpt2-medium-d
Checkpoint 1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint 450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
Checkpoint -1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer


In [11]:
def bhattacharyya_distance_matrix(mat):
    """
    Takes a collection of models and computes the pairwise
    Bhattacharyya distance between all pairs of models.
    """
    mat = np.sqrt(mat) + 1e-9
    mat1 = np.transpose(mat, axes=[1, 0, 2])
    mat2 = np.transpose(mat, axes=[1, 2, 0])

    Dmat = 0.0
    dim = len(mat1)
    batch = 500

    # Compute distance in batches to avoid OOM
    for i in range(0, dim, batch):
        Dmat += (np.log(mat1[i:i+batch] @ mat2[i:i+batch])).sum(0)
    Dmat = Dmat / dim
    return Dmat

def compute_inpca(Dmat):
    """
    Compute the InPCA embedding from a pairwise distance matrix
    """
    # Double center matrix
    ldim = Dmat.shape[0]
    Pmat = np.eye(ldim) - 1.0/ ldim
    Wmat = (Pmat @ Dmat @ Pmat) / 2

    eigenval, eigenvec = np.linalg.eigh(Wmat)

    #Sort eigen-values by magnitude
    sort_ind = np.argsort(-np.abs(eigenval))
    eigenval = eigenval[sort_ind]
    eigenvec = eigenvec[:, sort_ind]
    sqrt_eigenval = np.sqrt(np.abs(eigenval))

    # Find projections
    projection = eigenvec * sqrt_eigenval.reshape(1, -1)

    return eigenval, projection

def inpca(model_predictions):
    predictions = model_predictions.cpu().numpy()
    probabilities = softmax(predictions, axis=2)
    distance_matrix = bhattacharyya_distance_matrix(probabilities)
    eigenval, embed = compute_inpca(distance_matrix)
    # embed = embed.reshape([model_predictions.shape[0]//len(all_models), len(all_models), -1])
    return embed

In [22]:
with torch.no_grad():
    torch.cuda.empty_cache()
pca = inpca(model_logits)
print(pca.shape)# (model_numsXcheckpointsXdims)

(190, 190)


In [None]:

pca= pca.reshape([len(all_models), model_logits.shape[0]//len(all_models), -1])
for j in range(20):
  fig = go.Figure(layout={'title': f'PC {j+1}'})
  fig.update_xaxes(title="Elapsed Training Tokens", type='log')
  fig.add_vrect(x0=0.2, x1=1, line_width=1, fillcolor="gold", opacity=0.2)
  for p, m, t in zip(pca, all_models, model_to_tokens_trained_on):
    tokens_trained_on = np.asarray(t)
    tokens_trained_on = 100 * (tokens_trained_on / tokens_trained_on[-1])
    tokens_trained_on[0] = 1e-4
    fig.add_trace(go.Scatter(x=tokens_trained_on, y=p[:,j], name=m, mode='lines+markers'))
  fig.show(config=config)
  # fig.write_image(f"stan+attn_ipca_{j+1}_probabilities.png", scale=5)

In [None]:
# plot_3d_trajectories(pca[0:4, :, 1:4]).show(config=config)
plot_3d_trajectories(pca[:, :, :3]).show(config=config)

In [None]:
with torch.no_grad():
    torch.cuda.empty_cache()
context_pca = inpca(context_logit[::50, :,:])
print(context_pca.shape)# (model_numsXcheckpointsXdims)

fig = go.Figure(layout={'title': f'PC {j+1}'})
fig.update_xaxes(title="Elapsed Context time")
for i, p in enumerate(context_pca):
  fig.add_trace(go.Scatter(x=np.arange(0,p.shape[0]), y=p, name=f'PC {i+1}', mode='lines+markers'))
fig.add_trace(go.Scatter(x=np.arange(0,context_pca[0].shape[0]), y=context_pca.mean(axis=-1), name='mean', mode='lines+markers'))
fig.show(config=config)

# Activity Analysis

In [6]:
model_attn = HookedTransformer.from_pretrained("attn-only-1l", device='cpu')
# model_stan = HookedTransformer.from_pretrained(
#     "stanford-gpt2-small-a", device='cpu')
pile_batch_size = 1
pile_dataloader = evals.make_pile_data_loader(
    tokenizer=model_attn.tokenizer, batch_size=pile_batch_size)

Loaded pretrained model attn-only-1l into HookedTransformer
10000


In [8]:
model_for_this_checkpoint = HookedTransformer.from_pretrained("stanford-gpt2-medium-a", checkpoint_index=0, device=device)

Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer


In [21]:
def activity_eff_dim(cache, layers):
    eff_dim = []
    for i in range(layers):
        # Move the tensor to CPU after getting it from the cache
        neuron = cache[f"blocks.{i}.hook_attn_out"].mean(dim=0, keepdim=False)
        
        q = torch.linalg.svdvals(neuron.T @ neuron)
        temp_eff_dim = (q).sum()**2 / (q**2).sum()
        eff_dim.append(temp_eff_dim.item())
        
        # Delete temporary variables to free up memory
        del neuron, q, temp_eff_dim
        torch.cuda.empty_cache()
    # Clear the cache dictionary
    return np.asarray(eff_dim)

torch.cuda.empty_cache()
step_size = 20

# model_to_in_context_learning_scores = {}
# model_to_effective_dimension = {}
# model_to_tokens_trained_on = {}

all_models = [
    # "attn-only-1l", "attn-only-2l", "attn-only-3l", "attn-only-4l",]
    # "stanford-gpt2-small-a", "stanford-gpt2-small-c", "stanford-gpt2-small-d",]
    "stanford-gpt2-medium-a", "stanford-gpt2-medium-c", "stanford-gpt2-medium-d"]

for model_name in all_models:
    print(model_name)
    checkpoints = get_checkpoint_labels(get_official_model_name(model_name))[0]
    checkpoints = log_scaled_integers_v2(len(checkpoints), step_size, 10)
    
    if model_name == "stanford-gpt2-medium-d":
        checkpoints[0] = 1
        
    tokens_trained_on = []
    effective_dimension = []

    for index in checkpoints:
        print(index)
        model_for_this_checkpoint = HookedTransformer.from_pretrained(model_name, checkpoint_index=index, device=device)
        layers = model_for_this_checkpoint.cfg.n_layers

        tokens_seen_for_this_checkpoint = model_for_this_checkpoint.cfg.checkpoint_value
        tokens_trained_on.append(tokens_seen_for_this_checkpoint)

        effective_dimension_for_this_checkpoint = np.zeros(model_for_this_checkpoint.cfg.n_layers)
        num_batches = 20 // pile_batch_size
        
        for i, x in enumerate(pile_dataloader):
            tokens = x['tokens'].to(device)
            loss, cache = model_for_this_checkpoint.run_with_cache(tokens, return_type='loss')

            effective_dimension_for_this_checkpoint += activity_eff_dim(cache, layers)
            
            # Delete the tokens tensor to free up GPU memory.
            del tokens, cache, loss
            
            if i == num_batches:
                break

        del model_for_this_checkpoint
        torch.cuda.empty_cache()
        gc.collect()
                
        effective_dimension.append(effective_dimension_for_this_checkpoint/num_batches)
        
        # Delete the model to free up GPU memory.

    model_to_tokens_trained_on[model_name] = tokens_trained_on
    model_to_effective_dimension[model_name] = effective_dimension
    
    torch.cuda.empty_cache()


stanford-gpt2-medium-a
0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-a into HookedTransformer
stanford-gpt2-medium-c
0


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-c into HookedTransformer
stanford-gpt2-medium-d
1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
3


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
4


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
6


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
8


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
11


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
15


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
21


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
29


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
40


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
54


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
73


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
99


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
134


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
182


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
246


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
333


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
450


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer
-1


Using pad_token, but it is not set yet.


Loaded pretrained model stanford-gpt2-medium-d into HookedTransformer


In [25]:
fig = plot_effective_dimensionality(model_to_effective_dimension, model_to_tokens_trained_on, "Activity dim")
fig.add_vrect(x0=0.5, x1=1.8, line_width=1, fillcolor="gold", opacity=0.2)
fig.update_xaxes(title="% token in training set", type='log')
fig.update_yaxes(title="Effective dimensionality", type='log')
fig.show(config=config)
fig.write_image("activity_dim.png", scale=6)

# Spectral learning

In [None]:
torch.cuda.empty_cache()
step_size = 20

model_spectrum = {}
model_to_tokens_trained_on = {}

all_models = [
    "attn-only-1l", "attn-only-2l", "attn-only-3l", "attn-only-4l",
    "stanford-gpt2-small-a", "stanford-gpt2-small-c", "stanford-gpt2-small-d",
    "stanford-gpt2-medium-a", "stanford-gpt2-medium-c", "stanford-gpt2-medium-d"]

for model_name in all_models:
    print(model_name)
    checkpoints = get_checkpoint_labels(get_official_model_name(model_name))[0]
    checkpoints = log_scaled_integers_v2(len(checkpoints), step_size, 10)
    
    if model_name == "stanford-gpt2-medium-d":
        checkpoints[0] = 1
        
    tokens_trained_on = []
    spectrum = []

    # Only compute and store the necessary components of the SVD
    final_model = HookedTransformer.from_pretrained(model_name, checkpoint_index=-1, device='cpu')
    # choose QK or OV here
    U, s, V = final_model.QK.svd()
    V_inv = torch.linalg.pinv(V)
    U_inv = torch.linalg.pinv(U)

    # Move the final model to CPU and free up GPU memory
    del final_model
    torch.cuda.empty_cache()

    for index in checkpoints:
        print(index)
        model_for_this_checkpoint = HookedTransformer.from_pretrained(model_name, checkpoint_index=index, device='cpu')

        M = model_for_this_checkpoint.OV
        result_einsum = torch.einsum('ijkl,ijlm,ijmn->ijkn', U_inv, M.AB, V_inv.transpose(-2, -1))
        result_diagonals = torch.einsum('ijkl->ijk', result_einsum).cpu().numpy()

        tokens_seen_for_this_checkpoint = model_for_this_checkpoint.cfg.checkpoint_value
        tokens_trained_on.append(tokens_seen_for_this_checkpoint)
        spectrum.append(result_diagonals)

        # Move the model for this checkpoint to CPU and free up GPU memory
        del model_for_this_checkpoint, result_einsum, result_diagonals, M
        torch.cuda.empty_cache()
    
    del U, s, V, V_inv, U_inv 
    torch.cuda.empty_cache()
    gc.collect()

    model_to_tokens_trained_on[model_name] = tokens_trained_on
    model_spectrum[model_name] = spectrum


In [36]:
for model_name in all_models:
  spectrum = np.asarray(model_spectrum[model_name])
  head_mean_spectrum = np.mean(spectrum, axis =(1,2)).T
  fig = go.Figure(layout={'title': f'Spectral dynamics: {model_name}'})
  fig.update_xaxes(title="% Total Training Tokens", type='log')
  fig.add_vrect(x0=0.6, x1=1.3, line_width=1, fillcolor="gold", opacity=0.2)
  for i, sp in enumerate(head_mean_spectrum):
    tokens_trained_on = np.asarray(model_to_tokens_trained_on[model_name])
    tokens_trained_on = 100 * (tokens_trained_on / tokens_trained_on[-1])
    tokens_trained_on[0] = 1e-2
    fig.add_trace(go.Scatter(x=tokens_trained_on, y=sp, opacity = 0.8))
  fig.show()
  fig.write_image(f"qk_spectral_dynamics_{model_name}.png", scale=6)
