|<h2>Book:</h2>|<h1><a href="https://open.substack.com/pub/mikexcohen/p/llm-breakdown-16-tokenization-words" target="_blank">50 ML projects to understand LLMs</a></h1>|
|-|:-:|
|<h2>Project:</h2>|<h1><b>[14] Linear semantic axes</b></h1>|
|<h2>Author:<h2>|<h1>Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h1>|

<br>

<i>Using the code without reading the book may lead to confusion or errors.</i>

In [None]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [None]:
### matplotlib adjustments (commented lines are for dark mode)

# svg plots (higher-res)
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

plt.rcParams.update({
    # 'figure.facecolor': '#282a2c',
    # 'figure.edgecolor': '#282a2c',
    # 'axes.facecolor':   '#282a2c',
    # 'axes.edgecolor':   '#DDE2F4',
    # 'axes.labelcolor':  '#DDE2F4',
    # 'xtick.color':      '#DDE2F4',
    # 'ytick.color':      '#DDE2F4',
    # 'text.color':       '#DDE2F4',
    'axes.spines.right': False,
    'axes.spines.top':   False,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold',
    'savefig.dpi':300,
})

# **Part 1: Extract and normalize an embeddings matrix**

In [None]:
from transformers import RobertaTokenizer, RobertaForMaskedLM

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMaskedLM.from_pretrained('roberta-base')

# extract embeddings matrix and convert to numpy
embeddings =

In [None]:
# optional normalization
vectorNorms =
embeddings_norm =
embeddings_norm.shape

In [None]:
plt.figure(figsize=(12,3))
plt.plot(,'o',markeredgewidth=.3,markerfacecolor=[.9,.7,.7,.3])
plt.gca().set(xlabel='Token index',ylabel='Embedding norm',
              xlim=[-50,len(vectorNorms)+50])

plt.tight_layout()
plt.savefig('ch3_proj14_part1.png')
plt.show()

In [None]:
# confirm unit vector
np.linalg.norm(embeddings_norm[300]), np.linalg.norm(embeddings[300])

# **Part 2: Create a "semantic axis"**

In [None]:
# pick two words to define the axis
word4pos = tokenizer.encode(
word4neg = tokenizer.encode(

# confirm they're single-token words
print(word4pos, word4neg)

# but it's best to "de-dimensionalize" them for subsequent plotting
word4pos = word4pos[0]
word4neg =

In [None]:
# get the vectors for those words
v2add =
v2sub = embeddings

# create the "semantic axis" with "raw" vectors
semantic_axis =
semantic_axis /=  # post-subtraction normalization

# now starting from the normed vectors
v2add = embeddings_norm
v2sub =
semantic_axis_from_norm =

# print the norms
print(f'Norm of non-normed subtraction: {np.linalg.norm(semantic_axis):.3f}')
print(f'Norm of pre-normed subtraction:

In [None]:
# visualize
fig = plt.figure(figsize=(12,4))
gs = gridspec.GridSpec(1,3,figure=fig)
ax1 = fig.add_subplot(gs[:-1])
ax2 = fig.add_subplot(gs[-1])


ax1.plot(label='Normed after subtraction')
ax1.plot(label='Subtraction of normed vectors')
ax1.legend()

ax2.plot()
ax2.set(xlabel='Difference of "raw" vectors',ylabel='Difference of normed vectors',
        title='B) Comparison of difference vectors')

plt.tight_layout()
plt.savefig('ch3_proj14_part2.png')
plt.show()

In [None]:
print('Norms of the two vectors:')
print(f' {}')
print(f' {}')

# **Part 3: Project all tokens onto the axis**

In [None]:
# calculate dot products
dotprods = semantic_axis @ embeddings_norm.T

fig = plt.figure(figsize=(12,3.5))
gs = gridspec.GridSpec(1,3,figure=fig)
ax1 = fig.add_subplot
ax2 =

# plot all the similarities
ax1.plot(dotprods,'k.',alpha=.3)


ax2.hist
ax2.set(xlabel='Cosine similarity',ylabel='Count',
        title='B) Distribution of projection values')

plt.tight_layout()
plt.savefig('ch3_proj14_part3.png')
plt.show()

In [None]:
# find top and bottom 10 highest scores
top10 = dotprods.argsort
bot10 =


# print them out
print('\n10 most positive-projected words:')
print('  Proj.  |   Word')
print('---------+------------')
for widx in top10:
  print(f'  {}  |  "{}"')

print('\n\n10 most negative-projected words:')
print('  Proj.  |   Word')
print('---------+------------')
for widx in bot10:
  print(f' {}  |  "{}"')

# **Part 4: Try other semantic axes**

In [None]:
# good/evil
# young/old
# big/small

# **Part 5: Try other embeddings**

In [None]:
# # load BERT tokenizer and model
# from transformers import BertTokenizer, BertModel
# tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
# model = BertModel.from_pretrained('bert-large-uncased')
# embeddings = model.embeddings.word_embeddings.weight.detach().numpy()

In [None]:
# # GPT2 tokenizer and model
# from transformers import GPT2Tokenizer,GPT2Model
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
# model = GPT2Model.from_pretrained('gpt2-large')
# embeddings = model.wte.weight.detach().numpy()