In [1]:
! pip install transformers



In [2]:
from transformers import AutoTokenizer, AutoModel
from numpy import dot
from numpy.linalg import norm

In [3]:
# Load small Transformer model (distilbert for speed)
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

In [4]:
# Example sentence and target word
sentence = "The king and queen ruled wisely."
target_word = "queen"

In [5]:
# Tokenize sentence
inputs = tokenizer(sentence, return_tensors="pt")
outputs = model(**inputs)

In [6]:
# Get hidden states (last layer)
last_hidden = outputs.last_hidden_state[0]  # shape: [seq_len, hidden_size]

In [7]:
# Map tokens back to words
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

In [8]:
# Find embedding for the target word
# For simplicity, take the first token that matches (can split subwords)
for idx, tok in enumerate(tokens):
    if target_word in tok:
        word_embedding = last_hidden[idx].detach().numpy()
        break

print(f"Word embedding length: {len(word_embedding)}")
print(f"First 10 values: {word_embedding[:10]}")

Word embedding length: 768
First 10 values: [ 0.604848   -0.329022   -0.11489624 -0.08473329 -0.02040417 -0.6469731
 -0.02629811  0.10178638 -0.18362316 -0.16340357]


In [9]:
# Example: compute similarity with another word in context
target_word2 = "king"
for idx, tok in enumerate(tokens):
    if target_word2 in tok:
        word_embedding2 = last_hidden[idx].detach().numpy()
        break

similarity = dot(word_embedding, word_embedding2) / (norm(word_embedding) * norm(word_embedding2))
print(f"Cosine similarity between '{target_word}' and '{target_word2}': {similarity:.3f}")

Cosine similarity between 'queen' and 'king': 0.829
