# Positional Encoding

In [1]:
import torch
import torch.nn as nn

max_sequence_length= 10
d_model =6


$$
PE(\text{position},2i)=\sin(\frac{\text{position}}{10000^{\frac{2i}{d_{model}}}})
$$

$$
PE(\text{position},2i+1)=\cos(\frac{\text{position}}{10000^{\frac{2i}{d_{model}}}})
$$

or we can rewrite it as

$$
PE(\text{position},i)=\sin(\frac{\text{position}}{10000^{\frac{i}{d_{model}}}}) \text{when i is even}
$$

$$
PE(\text{position},i)=\cos(\frac{\text{position}}{10000^{\frac{i-1}{d_{model}}}}) \text{when i is odd}
$$

In [2]:
even_i=torch.arange(0, d_model, 2, dtype=torch.float)
even_i

tensor([0., 2., 4.])

In [3]:
odd_i=torch.arange(1, d_model, 2, dtype=torch.float)
odd_i

tensor([1., 3., 5.])

In [4]:
even_denominator=torch.pow(10000,even_i/d_model)
even_denominator

tensor([  1.0000,  21.5443, 464.1590])

In [7]:
odd_denominator=torch.pow(10000,(odd_i-1)/d_model)
odd_denominator

tensor([  1.0000,  21.5443, 464.1590])

In [8]:
# since both are the same we will just use one of them as the denominator
denominator=odd_denominator

In [15]:
positions=torch.arange(0,max_sequence_length,dtype=torch.float).unsqueeze(1)#.unsqueeze(1).unsqueeze(1)
# positions=torch.arange(0,max_sequence_length,dtype=torch.float).reshape(max_sequence_length,1)
positions

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])

In [16]:
evenPE=torch.sin(positions/denominator)
oddPE=torch.cos(positions/denominator)

In [17]:
evenPE

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.8415,  0.0464,  0.0022],
        [ 0.9093,  0.0927,  0.0043],
        [ 0.1411,  0.1388,  0.0065],
        [-0.7568,  0.1846,  0.0086],
        [-0.9589,  0.2300,  0.0108],
        [-0.2794,  0.2749,  0.0129],
        [ 0.6570,  0.3192,  0.0151],
        [ 0.9894,  0.3629,  0.0172],
        [ 0.4121,  0.4057,  0.0194]])

In [19]:
oddPE

tensor([[ 1.0000,  1.0000,  1.0000],
        [ 0.5403,  0.9989,  1.0000],
        [-0.4161,  0.9957,  1.0000],
        [-0.9900,  0.9903,  1.0000],
        [-0.6536,  0.9828,  1.0000],
        [ 0.2837,  0.9732,  0.9999],
        [ 0.9602,  0.9615,  0.9999],
        [ 0.7539,  0.9477,  0.9999],
        [-0.1455,  0.9318,  0.9999],
        [-0.9111,  0.9140,  0.9998]])

In [27]:
stacked=torch.stack([evenPE,oddPE],dim=2)
stacked.shape

torch.Size([10, 3, 2])

In [35]:
PE=torch.flatten(stacked,start_dim=1,end_dim=2) 
PE

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0464,  0.9989,  0.0022,  1.0000],
        [ 0.9093, -0.4161,  0.0927,  0.9957,  0.0043,  1.0000],
        [ 0.1411, -0.9900,  0.1388,  0.9903,  0.0065,  1.0000],
        [-0.7568, -0.6536,  0.1846,  0.9828,  0.0086,  1.0000],
        [-0.9589,  0.2837,  0.2300,  0.9732,  0.0108,  0.9999],
        [-0.2794,  0.9602,  0.2749,  0.9615,  0.0129,  0.9999],
        [ 0.6570,  0.7539,  0.3192,  0.9477,  0.0151,  0.9999],
        [ 0.9894, -0.1455,  0.3629,  0.9318,  0.0172,  0.9999],
        [ 0.4121, -0.9111,  0.4057,  0.9140,  0.0194,  0.9998]])

# Class

In [38]:
import torch
import torch.nn as nn

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_sequence_length):
        super().__init__()
        self.d_model=d_model
        self.max_sequence_length=max_sequence_length
        pass
    def forward(self):
        even_i=torch.arange(0,self.d_model,2,dtype=torch.float)
        denominator=torch.pow(10000,even_i/self.d_model)
        positions=torch.arange(0,self.max_sequence_length,dtype=torch.float).unsqueeze(1)
        evenPE=torch.sin(positions/denominator)
        oddPE=torch.cos(positions/denominator)
        stacked=torch.stack([evenPE,oddPE],dim=2)
        PE=torch.flatten(stacked,start_dim=1,end_dim=2)
        return PE

In [39]:
pe=PositionalEncoding(d_model=6,max_sequence_length=10)
pe.forward()

tensor([[ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000],
        [ 0.8415,  0.5403,  0.0464,  0.9989,  0.0022,  1.0000],
        [ 0.9093, -0.4161,  0.0927,  0.9957,  0.0043,  1.0000],
        [ 0.1411, -0.9900,  0.1388,  0.9903,  0.0065,  1.0000],
        [-0.7568, -0.6536,  0.1846,  0.9828,  0.0086,  1.0000],
        [-0.9589,  0.2837,  0.2300,  0.9732,  0.0108,  0.9999],
        [-0.2794,  0.9602,  0.2749,  0.9615,  0.0129,  0.9999],
        [ 0.6570,  0.7539,  0.3192,  0.9477,  0.0151,  0.9999],
        [ 0.9894, -0.1455,  0.3629,  0.9318,  0.0172,  0.9999],
        [ 0.4121, -0.9111,  0.4057,  0.9140,  0.0194,  0.9998]])