In [1]:
import torch
import torch.nn as nn
from torch import Tensor

In [21]:
L = 16
D_MODEL = 4

In [45]:
class PositionalEncoding(nn.Module):
    def __init__(
        self, 
        d_model: int = D_MODEL, 
        dropout: float = 0.0, 
        seq_len: int = L
    ):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(seq_len, d_model)
        k = torch.arange(0, seq_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10_000.0)) / d_model))
        pe[:, 0::2] = torch.sin(k * div_term)
        pe[:, 1::2] = torch.cos(k * div_term)
        pe = pe.unsqueeze(0)

        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        x += self.pe[:, :x.size(1)].requires_grad_(False)
        x = self.dropout(x)
        return x

In [46]:
x_emb = torch.rand(1, L, D_MODEL)

In [47]:
pe = PositionalEncoding(d_model=D_MODEL)

In [48]:
pe.state_dict()['pe']

tensor([[[ 0.0000,  1.0000,  0.0000,  1.0000],
         [ 0.8415,  0.5403,  0.0100,  0.9999],
         [ 0.9093, -0.4161,  0.0200,  0.9998],
         [ 0.1411, -0.9900,  0.0300,  0.9996],
         [-0.7568, -0.6536,  0.0400,  0.9992],
         [-0.9589,  0.2837,  0.0500,  0.9988],
         [-0.2794,  0.9602,  0.0600,  0.9982],
         [ 0.6570,  0.7539,  0.0699,  0.9976],
         [ 0.9894, -0.1455,  0.0799,  0.9968],
         [ 0.4121, -0.9111,  0.0899,  0.9960],
         [-0.5440, -0.8391,  0.0998,  0.9950],
         [-1.0000,  0.0044,  0.1098,  0.9940],
         [-0.5366,  0.8439,  0.1197,  0.9928],
         [ 0.4202,  0.9074,  0.1296,  0.9916],
         [ 0.9906,  0.1367,  0.1395,  0.9902],
         [ 0.6503, -0.7597,  0.1494,  0.9888]]])

In [49]:
x_emb

tensor([[[0.9803, 0.4167, 0.4230, 0.1088],
         [0.0773, 0.7389, 0.1826, 0.0478],
         [0.8858, 0.5079, 0.8251, 0.7519],
         [0.4062, 0.2063, 0.2592, 0.7307],
         [0.8035, 0.8051, 0.8606, 0.9832],
         [0.9974, 0.6177, 0.2149, 0.9632],
         [0.0533, 0.0712, 0.4286, 0.1503],
         [0.5433, 0.6627, 0.4333, 0.2395],
         [0.9823, 0.5219, 0.5031, 0.5695],
         [0.1357, 0.6164, 0.1595, 0.7419],
         [0.4672, 0.7585, 0.5333, 0.4226],
         [0.2241, 0.6310, 0.9298, 0.2666],
         [0.4494, 0.7842, 0.7811, 0.7269],
         [0.0999, 0.3252, 0.2837, 0.4922],
         [0.1572, 0.4564, 0.9636, 0.5060],
         [0.7520, 0.0984, 0.7017, 0.6566]]])

In [50]:
pe(x_emb)

tensor([[[ 0.9803,  1.4167,  0.4230,  1.1088],
         [ 0.9187,  1.2792,  0.1926,  1.0477],
         [ 1.7951,  0.0918,  0.8451,  1.7517],
         [ 0.5473, -0.7837,  0.2891,  1.7302],
         [ 0.0467,  0.1514,  0.9005,  1.9824],
         [ 0.0385,  0.9013,  0.2649,  1.9620],
         [-0.2261,  1.0314,  0.4885,  1.1485],
         [ 1.2002,  1.4166,  0.5033,  1.2370],
         [ 1.9717,  0.3764,  0.5831,  1.5663],
         [ 0.5479, -0.2947,  0.2494,  1.7379],
         [-0.0768, -0.0805,  0.6332,  1.4176],
         [-0.7759,  0.6355,  1.0395,  1.2605],
         [-0.0872,  1.6281,  0.9008,  1.7197],
         [ 0.5200,  1.2327,  0.4134,  1.4838],
         [ 1.1478,  0.5931,  1.1032,  1.4962],
         [ 1.4023, -0.6613,  0.8511,  1.6454]]])