# Doing SVD on positional embeddings

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SamAdamDay/mechanistic-interpretability-projects/blob/main/playground/pos-embed-svd.ipynb)

Doing singular value decomposition

In [1]:
DEVELOPMENT_MODE = True                 #@param {type:"boolean"}
MODEL_NAME = "gpt2-small"               #@param {type:"string"}

## Setup

In [2]:
try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
    %pip install git+https://github.com/SamAdamDay/mechanistic-interpretability-projects.git
except:
    IN_COLAB = False
    print("Running as a Jupyter notebook - intended for development only!")
    from IPython import get_ipython

    ipython = get_ipython()
    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel
    ipython.magic("load_ext autoreload")
    ipython.magic("autoreload 2")

Running as a Jupyter notebook - intended for development only!
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  ipython.magic("load_ext autoreload")
  ipython.magic("autoreload 2")


In [3]:
import plotly.io as pio

if IN_COLAB or DEVELOPMENT_MODE:
    pio.renderers.default = "colab"
else:
    pio.renderers.default = "notebook_connected"
print(f"Using renderer: {pio.renderers.default}")


Using renderer: colab


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import numpy as np

from sklearn.linear_model import LinearRegression

from fancy_einsum import einsum

from tqdm import tqdm

import plotly.express as px

import matplotlib.pyplot as plt

import transformer_lens
import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookedRootModule,
    HookPoint,
)  # Hooking utilities
from transformer_lens import (
    HookedTransformer,
    HookedTransformerConfig,
    FactoredMatrix,
    ActivationCache,
)


In [5]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f06254753d0>

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"
print(device)

cpu


## Get model and positional embedding

In [7]:
model = HookedTransformer.from_pretrained(MODEL_NAME, device=device, center_writing_weights=False)

Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-small into HookedTransformer


In [8]:
W_pos = model.W_pos
W_pos_numpy = utils.to_numpy(W_pos)
print(W_pos_numpy.shape)

(1024, 768)


In [9]:
d_vocab = model.tokenizer.vocab_size

## Do SVD on the positional embeddings

In [10]:
U, S, V = np.linalg.svd(W_pos_numpy, full_matrices=False)

Plot the singular values

In [11]:
px.histogram(x=S, labels=dict(x="Singular values"), title="Singular values of W_pos")

Approximate rank

In [12]:
(S > 0.01 * S[0]).sum()

28

Plot the first 5 singular vectors

In [13]:
px.imshow(U[:, :5].T, aspect="auto", title="First 5 left singular vectors", color_continuous_midpoint=0)

In [14]:
px.line(0 - U[:, :5], title="First 5 left singular vectors")

In [15]:
px.imshow(U[:, :50].T, aspect="auto", title="First 50 left singular vectors", color_continuous_midpoint=0)

## After removing the first 5 rows

In [16]:
W_pos_numpy_m5 = W_pos_numpy[:, 5:]
U_m5, S_m5, V_m5 = np.linalg.svd(W_pos_numpy_m5, full_matrices=False)

In [17]:
px.histogram(x=S_m5, labels=dict(x="Singular values"), title="Singular values of W_pos after omitting the first 5 rows")

Approximate rank

In [18]:
(S_m5 > 0.01 * S_m5[0]).sum()

28

Plot the first 5 singular vectors

In [19]:
px.imshow(U_m5[:, :5].T, aspect="auto", title="First 5 left singular vectors after omitting the first 5 rows", color_continuous_midpoint=0)