|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 5:</h2>|<h1>Observation (non-causal) mech interp<h1>|
|<h2>Section:</h2>|<h1>Investigating layers<h1>|
|<h2>Lecture:</h2>|<h1><b>"Effective dimensionality" analysis with PCA<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">udemy.com/course/dulm_x/?couponCode=202509</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import gridspec

import requests

import torch
from transformers import AutoModelForCausalLM, GPT2Tokenizer

# vector plots
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# load GPT2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained('gpt2-xl')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-xl')

model.to(device)
model.eval()

In [None]:
# Alice in Wonderland
text = requests.get('https://www.gutenberg.org/cache/epub/11/pg11.txt').text
allTokens = tokenizer.encode(text,return_tensors='pt')
tokens = allTokens[:,10000:11000]

print(tokenizer.decode(tokens[0]))

In [None]:
tokensShuffle = tokens[0,torch.randperm(len(tokens[0]))].unsqueeze(0)
print(tokenizer.decode(tokensShuffle[0]))

# Push the data and get the activations

In [None]:
# push through the model (~3 mins with gpt2-xl on CPU, or <1s on GPU, lol)
with torch.no_grad():
  outputs_real = model(tokens.to(device),output_hidden_states=True)
  outputs_shuf = model(tokensShuffle.to(device),output_hidden_states=True)

outputs_real.hidden_states[0].shape

In [None]:
numHidden = len(outputs_real.hidden_states)
numHidden

In [None]:
## calculate dimensionality metrics (~1 min)

# initialize
cumVarExplained = np.zeros((numHidden,outputs_real.hidden_states[4].shape[1],2))
effectiveCompCount = np.zeros((numHidden,2),dtype=int)


# loop over layers
for layeri in range(numHidden):

  # extract all the activations from this layer (assuming no batches!)
  acts = outputs_real.hidden_states[layeri].squeeze().cpu().numpy()

  # mean-center the activations
  acts -= acts.mean(axis=0,keepdims=True)

  # get singular values
  s = np.linalg.svd(acts)[1]

  # percent explained (cumulative)
  pctExplained = 100 * s**2 / np.sum(s**2)
  cumVarExplained[layeri,:,0] = np.cumsum(pctExplained)

  # count the components until 95% variance is explained
  effectiveCompCount[layeri,0] = np.where(cumVarExplained[layeri,:,0]>95)[0][0]+1



  ### repeat for shuffled tokens
  acts = outputs_shuf.hidden_states[layeri].squeeze().cpu().numpy()
  acts -= acts.mean(axis=0,keepdims=True)
  s = np.linalg.svd(acts)[1] # get singular values
  pctExplained = 100 * s**2 / np.sum(s**2) # percent explained
  cumVarExplained[layeri,:,1] = np.cumsum(pctExplained) # cumulative
  effectiveCompCount[layeri,1] = np.where(cumVarExplained[layeri,:,1]>95)[0][0]+1


In [None]:
# The plot in the slides...

_,axs = plt.subplots(1,2,figsize=(12,4))

acts = outputs_real.hidden_states[5].squeeze().cpu().numpy()
acts -= acts.mean(axis=0,keepdims=True)
s = np.linalg.svd(acts)[1] # get singular values
pctExplained = 100 * s**2 / np.sum(s**2) # percent explained

axs[0].plot(pctExplained,'ks-',markerfacecolor=[.7,.9,.7])
axs[0].set(xlim=[-1,100],xlabel='Component number',ylabel='Percent variance explained')

axs[1].plot(np.cumsum(pctExplained),'ks-',markerfacecolor=[.7,.7,.9])
axs[1].axhline(80,linestyle='--',color='gray')
axs[1].axvline(53,linestyle='--',color='gray')
axs[1].set(xlim=[-1,100],xlabel='Component number',ylabel='Cumulative % variance explained')

plt.tight_layout()
plt.show()

# Visualization

In [None]:
# setup the figure and axes
fig = plt.figure(figsize=(10,7))
gs = gridspec.GridSpec(2,2)
axs1 = fig.add_subplot(gs[0,0])
axs2 = fig.add_subplot(gs[0,1])
axs3 = fig.add_subplot(gs[1,:])

# normalization function for mapping layer index onto color
norm = mpl.colors.Normalize(vmin=0,vmax=numHidden)


# plt the cumulative variance explained
for layeri in range(numHidden):
  axs1.plot(cumVarExplained[layeri,:,0],color=mpl.cm.plasma(norm(layeri)))
  axs2.plot(cumVarExplained[layeri,:,1],color=mpl.cm.plasma(norm(layeri)))


# add colorbars
sm = mpl.cm.ScalarMappable(cmap=mpl.cm.plasma,norm=norm)
cbar = plt.colorbar(sm,ax=axs1)
cbar.set_label(r'Hidden layer')
cbar = plt.colorbar(sm,ax=axs2)
cbar.set_label(r'Hidden layer')

# make it look nicer
axs1.set(xlabel='Component number',ylabel='% explained (cumulative)',ylim=[50,100.5],xlim=[-2,500],title='(Real) variance explained')
axs2.set(xlabel='Component number',ylabel='% explained (cumulative)',ylim=[50,100.5],xlim=[-2,500],title='(Shuffled) variance explained')



## plot the "effective subspace dimensionality" of each layer
axs3.plot(effectiveCompCount[:,1],'ks',markerfacecolor=[.9,.7,.7,.5],markersize=10,label='Shuffled tokens')
axs3.plot(effectiveCompCount[:,0],'ko',markerfacecolor=[.7,.9,.7],markersize=10,label='Real tokens')
axs3.legend()
axs3.set(xlabel='Hidden layer',ylabel='Numer of dimensions',title='"Effective dimensionality"',
             xlim=[-1,numHidden])

plt.tight_layout()
plt.show()