<a href="https://colab.research.google.com/github/AkHiLdEvGoD/DeepLearning-Algorithms/blob/main/Positional_Encoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
import math

In [19]:
batch_size = 2
seq_len = 4
embd_dims = 8

X = torch.randn(batch_size,seq_len,embd_dims)

In [20]:
class PositionalEncoding(nn.Module):
  def __init__(self,embd_dims,max_len=5000):
    super().__init__()
    self.pe = torch.zeros(max_len,embd_dims)

    # for p in range(max_len):                                                  # Brute force approach
    #   for i in range(embd_dims/2):
    #     pos = p.float()
    #     den = torch.exp(2*i * (-math.log(10000)/embd_dims))
    #     pe[p,2*i] = torch.sin(pos/den)
    #     pe[p,2*i+1] = torch.cos(pos/den)

    pos = torch.arange(0,max_len).float().unsqueeze(1)                          # Vectorized and optimized approach
    den = torch.exp(torch.arange(0,embd_dims,2).float() * (-math.log(10000)/embd_dims))
    self.pe[:,0::2] = torch.sin(pos/den)
    self.pe[:,1::2] = torch.cos(pos/den)
    self.pe = self.pe.unsqueeze(0)

  def forward(self,X):
    seq_len = X.shape[1]
    return self.pe,X + self.pe[:,:seq_len]

In [25]:
Pe = PositionalEncoding(embd_dims)
pe,out = Pe(X)
pe

tensor([[[ 0.0000,  1.0000,  0.0000,  ...,  1.0000,  0.0000,  1.0000],
         [ 0.8415,  0.5403, -0.5440,  ...,  0.8623,  0.8269,  0.5623],
         [ 0.9093, -0.4161,  0.9129,  ...,  0.4872,  0.9300, -0.3676],
         ...,
         [ 0.9563, -0.2925, -0.1680,  ..., -0.1560,  0.8757, -0.4829],
         [ 0.2705, -0.9627, -0.3953,  ..., -0.6347,  0.0932, -0.9957],
         [-0.6639, -0.7478,  0.8314,  ..., -0.9386, -0.7709, -0.6370]]])

In [22]:
pe

tensor([[[ 0.0000,  1.0000,  0.0000,  ...,  1.0000,  0.0000,  1.0000],
         [ 0.8415,  0.5403, -0.5440,  ...,  0.8623,  0.8269,  0.5623],
         [ 0.9093, -0.4161,  0.9129,  ...,  0.4872,  0.9300, -0.3676],
         ...,
         [ 0.9563, -0.2925, -0.1680,  ..., -0.1560,  0.8757, -0.4829],
         [ 0.2705, -0.9627, -0.3953,  ..., -0.6347,  0.0932, -0.9957],
         [-0.6639, -0.7478,  0.8314,  ..., -0.9386, -0.7709, -0.6370]]])

In [24]:
out

tensor([[[-0.5386,  2.2538,  0.6005,  1.4478, -1.5772,  0.6203,  0.3477,
           0.9727],
         [ 1.8495, -0.3715, -0.8082, -2.9667,  0.3308,  1.3642,  1.2346,
           0.1607],
         [ 0.0233, -1.4111,  0.7780, -0.1685, -2.1223, -0.5651,  1.4458,
           0.2101],
         [ 0.2470, -0.3335, -0.3039, -0.5433, -1.2888, -0.4704, -1.1209,
          -0.4416]],

        [[ 0.3593, -0.3690,  1.5370,  2.4009, -1.3833,  1.7282, -0.8502,
           0.0694],
         [ 2.7755,  0.3159, -1.5607, -0.6250,  0.1782, -0.1329,  0.9477,
           1.0594],
         [-0.4505, -0.2123,  0.5060,  0.0520, -1.3050,  0.0451, -0.3667,
          -1.0179],
         [ 0.2407,  0.5437, -1.2377, -1.5430, -1.4499,  0.4212, -0.9759,
          -0.3293]]])