In [None]:
import torch
import torch.nn as nn

# Define the token IDs for a sentence (Input)
token_ids = torch.tensor([101, 204, 333, 410, 101, 897])  # Example token IDs

# Step 1: Embedding Layer (Input: Token IDs)
vocab_size = 1000  # Assuming we have a vocabulary of 1000 tokens
embedding_dim = 512  # Size of each embedding vector

embedding_layer = nn.Embedding(vocab_size, embedding_dim)
word_embeddings = embedding_layer(token_ids)  # Output: Embeddings for each token

# Check the shape of the output (it should be [Sequence Length, Embedding Dimension])
print(word_embeddings.shape)  # Output: torch.Size([6, 512]) for 6 tokens

# Step 2: Add Positional Encoding (if necessary)
class PositionalEncoding(nn.Module):
    def __init__(self, embedding_dim, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, embedding_dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embedding_dim, 2).float() * (-torch.log(torch.tensor(10000.0)) / embedding_dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

pos_enc = PositionalEncoding(embedding_dim)
word_embeddings_with_positional_encoding = pos_enc(word_embeddings)

# Final output with positional encoding
print(word_embeddings_with_positional_encoding.shape)  # Output: torch.Size([6, 512])


In [None]:
print(embedding_layer.shape)
print(word_embeddings)


In [None]:
position = torch.arange(0,50000, dtype=torch.float)
print(position)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
print(position)