## Import Dependencies


In [5]:
import torch
import torch.nn as nn
d_model = 10
sequence_length = 10

$$
PE(\text{position}, 2i) =\sin\bigg(\frac{\text{position} }{10000^\frac{2i}{d_{model}}} \bigg)
$$

$$
PE(\text{position}, 2i+1) = \cos\bigg( \frac{ \text{position} }{10000^\frac{2i}{d_{model}}} \bigg)
$$

We can rewrite these as

$$
PE(\text{position}, i) = \sin\bigg( \frac{ \text{position} }{10000^\frac{i}{d_{model}}} \bigg) \text{ when i is even}
$$

$$
PE(\text{position}, i) = \cos\bigg( \frac{ \text{position} }{10000^\frac{i-1}{d_{model}}} \bigg) \text{ when i is odd}
$$


In [2]:
even_index = torch.arange(0, d_model, 2).float()  # random index for even
odd_index = torch.arange(1, d_model, 2).float()  # random index for odd
print(even_index)
print(odd_index)

tensor([0., 2., 4., 6., 8.])
tensor([1., 3., 5., 7., 9.])


In [3]:
# denominator term for even position indices
even_denominator = torch.pow(10000, even_index/d_model)
# denominator term for odd position indices
odd_denominator = torch.pow(10000, (odd_index-1)/d_model)
print(even_denominator)
print(odd_denominator)

tensor([1.0000e+00, 6.3096e+00, 3.9811e+01, 2.5119e+02, 1.5849e+03])
tensor([1.0000e+00, 6.3096e+00, 3.9811e+01, 2.5119e+02, 1.5849e+03])


In [4]:
denominator = even_denominator
# random position indices
position = torch.arange(sequence_length, dtype=torch.float).unsqueeze(1)
print(position)

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])


In [5]:
even_position = torch.sin(position/denominator)
odd_position = torch.cos(position/denominator)
print(even_position)
print(even_position.shape)
print(odd_position)
print(odd_position.shape)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 8.4147e-01,  1.5783e-01,  2.5116e-02,  3.9811e-03,  6.3096e-04],
        [ 9.0930e-01,  3.1170e-01,  5.0217e-02,  7.9621e-03,  1.2619e-03],
        [ 1.4112e-01,  4.5775e-01,  7.5285e-02,  1.1943e-02,  1.8929e-03],
        [-7.5680e-01,  5.9234e-01,  1.0031e-01,  1.5924e-02,  2.5238e-03],
        [-9.5892e-01,  7.1207e-01,  1.2526e-01,  1.9904e-02,  3.1548e-03],
        [-2.7942e-01,  8.1396e-01,  1.5014e-01,  2.3884e-02,  3.7857e-03],
        [ 6.5699e-01,  8.9544e-01,  1.7493e-01,  2.7864e-02,  4.4167e-03],
        [ 9.8936e-01,  9.5448e-01,  1.9960e-01,  3.1843e-02,  5.0476e-03],
        [ 4.1212e-01,  9.8959e-01,  2.2415e-01,  3.5822e-02,  5.6786e-03]])
torch.Size([10, 5])


In [10]:
stacked = torch.stack((even_position, odd_position), dim=2)
stacked.shape

torch.Size([10, 5, 2])

In [14]:
flatten=torch.flatten(stacked, start_dim=1,end_dim=2)
print(flatten)
print(flatten.shape)

tensor([[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00],
        [ 8.4147e-01,  5.4030e-01,  1.5783e-01,  9.8747e-01,  2.5116e-02,
          9.9968e-01,  3.9811e-03,  9.9999e-01,  6.3096e-04,  1.0000e+00],
        [ 9.0930e-01, -4.1615e-01,  3.1170e-01,  9.5018e-01,  5.0217e-02,
          9.9874e-01,  7.9621e-03,  9.9997e-01,  1.2619e-03,  1.0000e+00],
        [ 1.4112e-01, -9.8999e-01,  4.5775e-01,  8.8908e-01,  7.5285e-02,
          9.9716e-01,  1.1943e-02,  9.9993e-01,  1.8929e-03,  1.0000e+00],
        [-7.5680e-01, -6.5364e-01,  5.9234e-01,  8.0569e-01,  1.0031e-01,
          9.9496e-01,  1.5924e-02,  9.9987e-01,  2.5238e-03,  1.0000e+00],
        [-9.5892e-01,  2.8366e-01,  7.1207e-01,  7.0211e-01,  1.2526e-01,
          9.9212e-01,  1.9904e-02,  9.9980e-01,  3.1548e-03,  1.0000e+00],
        [-2.7942e-01,  9.6017e-01,  8.1396e-01,  5.8092e-01,  1.5014e-01,
          9.8866e-01,  2.3884e-0

## Creating a class


In [2]:
import torch
import torch.nn as nn


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, sequence_length) -> None:
        super().__init__()
        self.d_model = d_model
        self.sequence_length = sequence_length
    def forward(self):
        index = torch.arange(0, self.d_model, 2).float()  # random index 
        denominator = torch.pow(10000, index/self.d_model)
        position = torch.arange(self.sequence_length, dtype=torch.float).unsqueeze(1)
        even_position = torch.sin(position/denominator)
        odd_position = torch.cos(position/denominator)
        stacked = torch.stack((even_position, odd_position), dim=2)
        positional_encoding=torch.flatten(stacked, start_dim=1,end_dim=2)
        return positional_encoding
    
positional_encoding = PositionalEncoding(d_model=10, sequence_length=10)
positional_encoding.forward()
    

tensor([[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00],
        [ 8.4147e-01,  5.4030e-01,  1.5783e-01,  9.8747e-01,  2.5116e-02,
          9.9968e-01,  3.9811e-03,  9.9999e-01,  6.3096e-04,  1.0000e+00],
        [ 9.0930e-01, -4.1615e-01,  3.1170e-01,  9.5018e-01,  5.0217e-02,
          9.9874e-01,  7.9621e-03,  9.9997e-01,  1.2619e-03,  1.0000e+00],
        [ 1.4112e-01, -9.8999e-01,  4.5775e-01,  8.8908e-01,  7.5285e-02,
          9.9716e-01,  1.1943e-02,  9.9993e-01,  1.8929e-03,  1.0000e+00],
        [-7.5680e-01, -6.5364e-01,  5.9234e-01,  8.0569e-01,  1.0031e-01,
          9.9496e-01,  1.5924e-02,  9.9987e-01,  2.5238e-03,  1.0000e+00],
        [-9.5892e-01,  2.8366e-01,  7.1207e-01,  7.0211e-01,  1.2526e-01,
          9.9212e-01,  1.9904e-02,  9.9980e-01,  3.1548e-03,  1.0000e+00],
        [-2.7942e-01,  9.6017e-01,  8.1396e-01,  5.8092e-01,  1.5014e-01,
          9.8866e-01,  2.3884e-0