In [None]:
import math
import torch
def get_embedding(
    num_embeddings: int, embedding_dim: int, padding_idx = None
):
    """Build sinusoidal embeddings.

    This matches the implementation in tensor2tensor, but differs slightly
    from the description in Section 3.5 of "Attention Is All You Need".
    """
    half_dim = embedding_dim // 2
    emb = math.log(10000) / (half_dim - 1)
    emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
    emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(
        1
    ) * emb.unsqueeze(0)
    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(
        num_embeddings, -1
    )
    if embedding_dim % 2 == 1:
        # zero pad
        emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
    if padding_idx is not None:
        emb[padding_idx, :] = 0
    return emb

get_embedding(10, 4)

# Sinusoidal Positional Embeddings

$$
\begin{align*}
\text{PE}(pos, 2i) &= \sin\left(\frac{pos}{10000^{2i/d_{\text{model}}}}\right) \\
\text{PE}(pos, 2i+1) &= \cos\left(\frac{pos}{10000^{2i/d_{\text{model}}}}\right)
\end{align*}
$$

In [6]:
import torch
import math
def sinusoidal_position_embeddings(num_embeddings, embedding_dim, theta=10000):
    sin_size = embedding_dim // 2 + embedding_dim%2
    cos_size = embedding_dim // 2
    embedding = torch.zeros(num_embeddings, embedding_dim)
    embedding_positions = torch.arange(0, num_embeddings).float()
    div_term = torch.exp(torch.arange(0, sin_size, 2).float() * -(math.log(theta) / sin_size))
    embedding[:, 0::2] = torch.sin(embedding_positions.unsqueeze(1) * div_term)
    embedding[:, 1::2] = torch.cos(embedding_positions.unsqueeze(1) * div_term)[:,:cos_size]
    return embedding
sinusoidal_position_embeddings(10, 4)

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.8415,  0.5403],
        [ 0.9093, -0.4161,  0.9093, -0.4161],
        [ 0.1411, -0.9900,  0.1411, -0.9900],
        [-0.7568, -0.6536, -0.7568, -0.6536],
        [-0.9589,  0.2837, -0.9589,  0.2837],
        [-0.2794,  0.9602, -0.2794,  0.9602],
        [ 0.6570,  0.7539,  0.6570,  0.7539],
        [ 0.9894, -0.1455,  0.9894, -0.1455],
        [ 0.4121, -0.9111,  0.4121, -0.9111]])

In [7]:
24575, 24*1024

(24575, 24576)

In [8]:
f'{(21001-20891)*(1024*2*24*11):,}'

'59,473,920'

In [None]:
11_821_670_400/100_000_000, f'{.216704*100_000_000:,}'

(118.216704, '21,670,400.0')

In [None]:
# PE(pos,2i)= sin(pos/100002i/dmodel )
# PE(pos,2i+1)= cos(pos/100002i/dmodel )

In [None]:
# Sinusoidal Positional Embeddings in markdown
# $$
# \begin{align*}
# \text{PE}(pos, 2i) &= \sin\left(\frac{pos}{10000^{2i/d_{\text{model}}}}\right) \\
# \text{PE}(pos, 2i+1) &= \cos\left(\frac{pos}{10000^{2i/d_{\text{model}}}}\right)
# \end{align*}
# $$