|<h2>Book:</h2>|<h1><a href="https://open.substack.com/pub/mikexcohen/p/llm-breakdown-16-tokenization-words" target="_blank">50 ML projects to understand LLMs</a></h1>|
|-|:-:|
|<h2>Project:</h2>|<h1><b>[42] MLP weights and activations characteristics</b></h1>|
|<h2>Author:<h2>|<h1>Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h1>|

<br>

<i>Using the code without reading the book may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.manifold import TSNE

import torch
import torch.nn.functional as F

from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
### matplotlib adjustments (commented lines are for dark mode)

# svg plots (higher-res)
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

plt.rcParams.update({
    # 'figure.facecolor': '#282a2c',
    # 'figure.edgecolor': '#282a2c',
    # 'axes.facecolor':   '#282a2c',
    # 'axes.edgecolor':   '#DDE2F4',
    # 'axes.labelcolor':  '#DDE2F4',
    # 'xtick.color':      '#DDE2F4',
    # 'ytick.color':      '#DDE2F4',
    # 'text.color':       '#DDE2F4',
    'axes.spines.right': False,
    'axes.spines.top':   False,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold',
    'savefig.dpi':300
})

# **Demo: GELU nonlinear activation function**

In [None]:
# simulate activation values
activations = torch.linspace(-4,4,51)

# nonlinear transformation
gelued = F.gelu(activations)

# calculate the change
diff = (activations-gelued)**2

# and plot
plt.figure(figsize=(10,5))
plt.plot(activations,activations,'k')
h = plt.scatter(activations,gelued,60,marker='h',edgecolor='k',alpha=.7,
                c=diff,cmap='magma',vmin=0,vmax=10)
plt.colorbar(h,pad=.01)
plt.grid(linestyle='--',linewidth=.4)
plt.gca().set(xlabel='"Raw" activations',ylabel='Post-GELU activations',
              title='Impact of GELU nonlinear activation')

plt.tight_layout()
plt.savefig('ch7_proj42_part0a.png')
plt.show()


# **Demo: linear separation with dimensionality expansion**

In [None]:
# angles
n = 100
theta = np.linspace(0,2*np.pi-1/n,n)

# coordinates in 2D
x_inner = 1*np.cos(theta) + np.random.randn(n)/10
y_inner = 1*np.sin(theta) + np.random.randn(n)/10
x_outer = 2*np.cos(theta) + np.random.randn(n)/10
y_outer = 2*np.sin(theta) + np.random.randn(n)/10

# dimensionality-expansion via nonlinear transform
z_inner = np.sqrt(x_inner**2 + y_inner**2)
z_outer = np.sqrt(x_outer**2 + y_outer**2)



### 2D scatter plot
fig = plt.figure(figsize=(12,5))
ax0 = fig.add_subplot(121)

ax0.plot(x_inner,y_inner,'ko',markerfacecolor=[.7,.9,.7],markersize=9)
ax0.plot(x_outer,y_outer,'ks',markerfacecolor=[.9,.7,.7],markersize=9)
ax0.axis('square')
ax0.set(title='A) Non-linearly separable in 2D',xlabel='x',ylabel='y',
        xticklabels=[],yticklabels=[])

### 3D scatter plot
ax1 = fig.add_subplot(122, projection='3d')
ax1.plot(x_inner,y_inner,z_inner,'ko',markerfacecolor=[.7,.9,.7],markersize=9)
ax1.plot(x_outer,y_outer,z_outer,'ks',markerfacecolor=[.9,.7,.7],markersize=9)
ax1.set(title='B) Linearly separable in 3D',xlabel='x',ylabel='y',zlabel='Radius',
        xticklabels=[],yticklabels=[])
ax1.view_init(20,20)

plt.savefig('ch7_proj42_part0b.png')
plt.show()

# **Part 1: Distributions of MLP weights**

In [None]:
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained('gpt2')
model.eval()

In [None]:
n_layers =

In [None]:
# extract the weights matrices
W1 = model.transformer.h[5].mlp.
W2 = model.transformer.h[5].mlp.

# extract the bias vectors
b1 = model.transformer.h[5].mlp.
b2 = model.transformer.h[5].mlp.

# counts
W1_n = W1.numel()
W2_n =
b1_n =
b2_n =

total_n =

print(' Type |     Size     |   Count   | % total')
print('------+--------------+-----------+---------')
print(f'  W1  |  {} | {W1_n:9,} | {}')
print(f'  b1  |  {} | {b1_n:9,} | {}')
print(f'  W2  |  {} | {W2_n:9,} | {}')
print(f'  b2  |  {} | {b2_n:9,} | {}')

In [None]:
# FYI, W1 and W2 are not inverses of each other
# their product is not the identity matrix
# (This figure is mentioned but not shown in the text.)
plt.imshow((W1@W2)**2,vmin=0,vmax=1) # squared to accentuate visualization
plt.show()

In [None]:
binedges = torch.linspace(-.8,.8,201)
y1,_ = torch.histogram(
y2,_ = torch.histogram(

plt.figure(figsize=(8,3))
plt.plot(,label=r'$\mathbf{W_1}$')
plt.plot(,label=r'$\mathbf{W_2}$')
plt.legend()

plt.gca().set(xlabel='Weight value',ylabel='Density',xlim=binedges[[0,-1]],ylim=[0,None])

plt.tight_layout()
plt.savefig('ch7_proj42_part1a.png')
plt.show()

In [None]:
weights_hists = torch.zeros((,,))

for layeri in range(n_layers):

  # get the matrices
  W1 = model.transformer.h
  W2 = model.transformer.h

  # get and store the histograms
  weights_hists[0,layeri,:],_ =
  weights_hists[1,layeri,:],_ =


_,axs = plt.subplots(1,2,figsize=(10,3.5))
for i in range(2):
  axs[i].imshow()

axs[0].set(xlabel='Weight value',ylabel='Layer',title='W1 (expansion)')
axs[1].set(xlabel='Weight value',ylabel='Layer',title='W2 (contraction)')

plt.tight_layout()
plt.savefig('ch7_proj42_part1b.png')
plt.show()

# **Part 2: Hooks and single-layer activation distributions**

In [None]:
# initialize the dictionary containing data
mlp_acts = {}

# the hook function
def hook(module, input, output):

  # calculate the MLP progression
  X1 = input[0]
  X2 = module.c_fc
  X3 = module.
  X4 = module.

  # store the results
  mlp_acts['input'] =
  mlp_acts['expansion'] =
  mlp_acts['gelu'] =
  mlp_acts['projection'] =

# implant the hook
handle = model.transformer.h[5].mlp.register_forward_hook(hook)

In [None]:
text = 'Would you prefer a strawberry-flavored peanut or a peanut-flavored strawberry?'
tokens = tokenizer.encode(text,return_tensors='pt')
ntokens =

# forward pass to trigger the hook


# remove the hook


In [None]:
for key,val in mlp_acts.items():
  print(f'{list(val.shape)} in stage "{key}"')

In [None]:
# extract for convenience
ip = mlp_acts['input'].flatten()
ex = mlp_acts['
ge = mlp_acts
co = mlp_acts

# redefine bin boundaries
binedges = torch.linspace(,,)

# histograms
yIp,_ = torch.histogram(
yEx,_ = torch.histogram(
yGe,_ = torch.histogram(
yCo,_ = torch.histogram(

# and plot
plt.figure(figsize=(7,4))
plt.plot(,label=f'Input (N = {len(ip):,})')
plt.plot(,label=f'Expansion (N = {len(ex):,})')
plt.plot(,label=f'GELU (N = {len(ge):,})')
plt.plot(,label=f'Projection (N = {len(co):,})')
plt.axvline(0,linestyle='--',color='k',linewidth=.5)

plt.gca().set(xlabel='Activation value',ylabel='Density',xlim=binedges[[0,-1]],ylim=[0,None])
plt.legend()

plt.tight_layout()
plt.savefig('ch7_proj42_part2.png')
plt.show()

# **Part 3: Distribution by token position**

In [None]:
# initialize
token_hists = torch.zeros((4,ntokens,len(binedges)-1))

# loop over layers
for i in range(ntokens):
  token_hists[0,i,:],_ = torch.histogram(mlp_acts['input'][,,],bins=binedges,density=True)
  token_hists[1,i,:],_ = torch.histogram(mlp_acts['expansion']
  token_hists[2,i,:],_ = torch.histogram(mlp_acts
  token_hists[3,i,:],_ =


_,axs = plt.subplots(1,4,figsize=(12,3.5))
for i in range(4):
  axs[i].imshow(,origin='lower',aspect='auto',cmap='magma',
              extent=[binedges[0],binedges[-1],0,ntokens-1],vmin=0,vmax=.5)

axs[0].set(xlabel='Activation value',ylabel='Token position',title='A) Input')
axs[1].set(xlabel='Activation value',ylabel='Token position',title='B) Expansion')
axs[2].set(xlabel='Activation value',ylabel='Token position',title='C) GELU')
axs[3].set(xlabel='Activation value',ylabel='Token position',title='D) Projection')

plt.tight_layout()
plt.savefig('ch7_proj42_part3.png')
plt.show()

# **Part 4: Laminar profiles of MLP distributions**

In [None]:
# re-initialize
mlp_acts = {}

def outerHook(layeri):
  def hook(module,input,output):

    # calculate the MLP progression
    X1 = input[0]
    X2 = module.
    X3 =

    # store the results
    mlp_acts[f'L{layeri}_input'] = X1.detach()
    mlp_acts[f'L{layeri}_expansion'] = X2.detach()
    mlp_acts[f'L{layeri}_gelu'] = X3.detach()
    mlp_acts[f'L{layeri}_projection'] =

  return hook

handles = []
for layeri in range(model.config.n_layer):
  modname = model.transformer.h[layeri].mlp
  h = modname.register_forward_hook(outerHook(layeri))
  handles.append(h)

In [None]:
# forward pass to trigger the hook
with torch.no_grad():
  model(tokens)

# remove the hooks
for h in handles:
  h.remove()

In [None]:
for k,v in mlp_acts.items():
  print(f'{k:>15} has size {list(v.shape)}')

In [None]:
# initialize
input_hists = torch.zeros(())
expansion_hists = torch.zeros(())
gelu_hists = torch.zeros(())
projection_hists = torch.zeros(())

# loop over layers
for i in range(n_layers):
  input_hists[i,:],_ = torch.histogram(mlp_acts[f'L{i}_input'][],bins=,density=)
  expansion_hists[i,:],_ = torch.histogram(mlp_acts[f'L{i}_expansion']
  gelu_hists[i,:],_ = torch.histogram(
  projection_hists[i,:],_ =




_,axs = plt.subplots(1,4,figsize=(12,3.5))
axs[0].imshow()
axs[1].imshow()
axs[2].imshow()
axs[3].imshow()

axs[0].set(xlabel='Activation value',ylabel='Layer',title='A) Input')
axs[1].set(xlabel='Activation value',ylabel='Layer',title='B) Expansion')
axs[2].set(xlabel='Activation value',ylabel='Layer',title='C) GELU')
axs[3].set(xlabel='Activation value',ylabel='Layer',title='D) Projection')

plt.tight_layout()
plt.savefig('ch7_proj42_part4.png')
plt.show()

# **Part 5: Dimension-reduction with T-SNE**

In [None]:
layeri = 10
parts = ['input','projection']

# setup the figure
fig,axs = plt.subplots(1,3,figsize=(13,3.5))

# loop over the two MLP parts
for parti,name in enumerate(parts):

  # reduce to 2D with t-SNE
  tsne = TSNE(,
  tsne_result = tsne.fit_transform()

  # draw the projections
  h = axs[parti].scatter(,,100,marker='h',edgecolor='k',
                  c=np.linspace(0,1,ntokens),cmap=plt.cm.plasma)

  # label the tokens
  yoffset = .03 * np.diff(axs[parti].get_ylim()) # shift words up by x% of the y-axis
  for i in range(
    axs[parti].text(tsne_result[i,0],tsne_result[i,1]+yoffset,
                    tokenizer.decode(),ha='center',fontsize=9)

  # finalize
  axs[parti].set(xlabel='TSNE dim 1',ylabel='TSNE dim 2',title=f'{')
  fig.colorbar(h,ax=axs[parti],pad=.02,label='Token index',ticks=[])


# create the scatter plot
# extract the data for convenience
inp = mlp_acts[f'L{layeri}_input']
con = mlp_acts[f'L{layeri}_projection']

axs[2].plot(inp,con,'ko',markerfacecolor=[.7,.9,.9,.5])
axs[2].set(xlabel='Stage 1 (input)',ylabel='Stage 4 (projection)',title=f'C) Scatter plot (r = {})')

plt.tight_layout()
plt.savefig('ch7_proj42_part5.png')
plt.show()

# **Part 6: Repeat in GPT-2-large**