In [1]:
import torch

# Set a random seed for reproducibility
torch.manual_seed(123)

# Create an embedding layer with 6 tokens and 3 dimensions
embedding_layer = torch.nn.Embedding(6, 3)

# Display the initial weights (embeddings)
print("Initial Embeddings:")
print(embedding_layer.weight)

# Retrieve the embedding for the token at index 3
token_index = torch.tensor([3])
token_embedding = embedding_layer(token_index)

print("\nEmbedding for Token at Index 3:")
print(token_embedding)

Initial Embeddings:
Parameter containing:
tensor([[ 0.3374, -0.1778, -0.1690],
        [ 0.9178,  1.5810,  1.3010],
        [ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-1.1589,  0.3255, -0.6315],
        [-2.8400, -0.7849, -1.4096]], requires_grad=True)

Embedding for Token at Index 3:
tensor([[-0.4015,  0.9666, -1.1481]], grad_fn=<EmbeddingBackward0>)


In [3]:
import torch
import torch.nn as nn

# -----------------------------
# 1. Input sentence
# -----------------------------
sentence = "I love LLM very much"

# -----------------------------
# 2. Tokenization
# -----------------------------
tokens = sentence.lower().split()
print("Tokens:", tokens)

# -----------------------------
# 3. Vocabulary & token IDs
# -----------------------------
vocab = {word: idx for idx, word in enumerate(set(tokens))}
token_ids = torch.tensor([vocab[token] for token in tokens])

print("Vocabulary:", vocab)
print("Token IDs:", token_ids.tolist())

# -----------------------------
# 4. Token Embedding
# -----------------------------
vocab_size = len(vocab)
embedding_dim = 8

token_embedding_layer = nn.Embedding(vocab_size, embedding_dim)
token_embeddings = token_embedding_layer(token_ids)

print("\nToken Embeddings:")
print(token_embeddings)

# -----------------------------
# 5. Positional Embedding
# -----------------------------
sequence_length = len(tokens)
position_ids = torch.arange(sequence_length)

position_embedding_layer = nn.Embedding(sequence_length, embedding_dim)
positional_embeddings = position_embedding_layer(position_ids)

print("\nPositional Embeddings:")
print(positional_embeddings)

# -----------------------------
# 6. Final Embedding (Token + Positional)
# -----------------------------
final_embeddings = token_embeddings + positional_embeddings

print("\nFinal Embeddings (Token + Positional):")
print(final_embeddings)

Tokens: ['i', 'love', 'nlp', 'very', 'much']
Vocabulary: {'very': 0, 'nlp': 1, 'i': 2, 'much': 3, 'love': 4}
Token IDs: [2, 4, 1, 0, 3]

Token Embeddings:
tensor([[-0.4213,  0.8879, -0.3825,  0.0672, -0.6540, -0.4250,  0.1616, -1.1260],
        [-0.1535, -1.1988, -2.1718, -1.7100,  0.2413, -0.7121, -0.9829,  0.7354],
        [-0.7540,  1.0738, -0.6731,  0.2997, -0.8631,  0.7167, -0.2071,  0.3427],
        [-1.4548, -2.1018,  0.3476,  2.4807, -0.5868, -1.0868, -1.1194,  1.1385],
        [ 0.5680, -0.4415, -0.1969,  2.0947,  1.3814,  0.3591,  0.5690,  0.3081]],
       grad_fn=<EmbeddingBackward0>)

Positional Embeddings:
tensor([[ 1.0493,  0.8459, -0.7381,  0.5146, -1.5782,  0.9765,  0.6253, -1.0222],
        [-0.4951, -2.1193, -0.3902, -1.6629, -1.0127, -0.0747, -1.5313, -0.4070],
        [ 0.5939,  1.0732, -0.2144,  0.0911,  0.6795,  0.6325,  1.6750,  0.4287],
        [ 0.1592, -0.3631, -0.2558, -3.0047,  0.7309, -2.6489, -1.1460,  0.9909],
        [ 0.9774,  0.5857, -0.2064, -0.4909, 