In [1]:
!pip install torch



In [2]:
import torch
import torch.nn as nn
import math

# PyTorch has the concept of networks (nn.Module);
# the forward function is automatically called during the forward pass

# Base class
class SinusoidalPositionEmbedding(nn.Module):
    def __init__(self, d_model, max_length):
        super().__init__()
        # Initialize the positional encoding matrix
        self.pe = torch.zeros(max_length, d_model)

        # Position indices: [max_length, 1]
        pos = torch.arange(0, max_length, dtype=torch.float).unsqueeze(1)

        # When i is even:  sin(pos / 10000^(2i / d_model))
        # When i is odd:   cos(pos / 10000^(2i / d_model))
        div_term = 10000.0 ** (torch.arange(0, d_model, 2).float() / d_model)
        # [d_model/2] â€” relies on NumPy PyTorch broadcasting mechanism

        # Apply sin to even indices
        self.pe[:, 0::2] = torch.sin(pos / div_term)  # [max_length, d_model/2]

        # Apply cosine to odd indices
        self.pe[:, 1::2] = torch.cos(pos / div_term)

        # Final shape: [max_length, d_model]

    def forward(self, x):
        return x + self.pe[:x.size(0)]

In [3]:
max_length = 40
d_model = 32
pos = torch.arange(0, max_length, dtype=torch.float).unsqueeze(1)
pos

tensor([[ 0.],
        [ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.],
        [10.],
        [11.],
        [12.],
        [13.],
        [14.],
        [15.],
        [16.],
        [17.],
        [18.],
        [19.],
        [20.],
        [21.],
        [22.],
        [23.],
        [24.],
        [25.],
        [26.],
        [27.],
        [28.],
        [29.],
        [30.],
        [31.],
        [32.],
        [33.],
        [34.],
        [35.],
        [36.],
        [37.],
        [38.],
        [39.]])

In [4]:
div_term = 10000.0**(torch.arange(0, d_model, 2).float() / d_model)
div_term

tensor([1.0000e+00, 1.7783e+00, 3.1623e+00, 5.6234e+00, 1.0000e+01, 1.7783e+01,
        3.1623e+01, 5.6234e+01, 1.0000e+02, 1.7783e+02, 3.1623e+02, 5.6234e+02,
        1.0000e+03, 1.7783e+03, 3.1623e+03, 5.6234e+03])

In [5]:
r = pos/div_term   #[40,16]
r.shape

torch.Size([40, 16])

In [6]:
r

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.0000e+00, 5.6234e-01, 3.1623e-01, 1.7783e-01, 1.0000e-01, 5.6234e-02,
         3.1623e-02, 1.7783e-02, 1.0000e-02, 5.6234e-03, 3.1623e-03, 1.7783e-03,
         1.0000e-03, 5.6234e-04, 3.1623e-04, 1.7783e-04],
        [2.0000e+00, 1.1247e+00, 6.3246e-01, 3.5566e-01, 2.0000e-01, 1.1247e-01,
         6.3246e-02, 3.5566e-02, 2.0000e-02, 1.1247e-02, 6.3246e-03, 3.5566e-03,
         2.0000e-03, 1.1247e-03, 6.3246e-04, 3.5566e-04],
        [3.0000e+00, 1.6870e+00, 9.4868e-01, 5.3348e-01, 3.0000e-01, 1.6870e-01,
         9.4868e-02, 5.3348e-02, 3.0000e-02, 1.6870e-02, 9.4868e-03, 5.3348e-03,
         3.0000e-03, 1.6870e-03, 9.4868e-04, 5.3348e-04],
        [4.0000e+00, 2.2494e+00, 1.2649e+00, 7.1131e-01, 4.0000e-01, 2.2494e-01,
         1.2649e-01, 7.1131e-02, 4.0000