In [1]:
import torch
import torch.nn as nn 
import math 



In [4]:
class InputEmbeddings(nn.Module):
    def __init__(self, d_model:int, vocabu_size:int):
        super().__init__()
        self.d_model = d_model
        self.vocabu_size = vocabu_size
        self.embedding = nn.Embedding(vocabu_size, d_model)
    def forward(self, x):
        return self.embedding(x)*math.sqrt(self.d_model)
    

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model:int, seq_len:int, dropout:float)->None:
        super.__init__()
        self.d_model = d_model
        self.seq_len=seq_len
        self.dropout = nn.Dropout(dropout)

        #initialize matrix of shape (seq_len, d_model)
        pe = torch.zeros(seq_len, d_model)
        #create position indices
        position = torch.arrange(0, seq_len, d_type=torch.float).unsqueeze(1)
        #numerically stable dividing term 
        div_term = torch.exp(torch.arrange(0,d_model, 2)).float()*(-math.log(10000.0)/d_model)
        pe[:, 0::2] = torch.sin(position*div_term)
        pe[:, 1::2] = torch.cos(position*div_term)
        #add batch dimension
        pe=pe.unsqueeze (0)
        #register buffer as a buffer is not a parameter, it is not updated during backprop
        self.register_buffer('pe',pe)





In [1]:
#import torch

#pos = torch.arange(0, 5)          # positions: 0..4
#print("pos:", pos)
#print("pos.shape:", pos.shape)     # (5,)

#pos_u1 = pos.unsqueeze(1)          # add dim at index 1
#print("\npos_u1:\n", pos_u1)
#print("pos_u1.shape:", pos_u1.shape)  # (5, 1)

#pos_u0 = pos.unsqueeze(0)          # add dim at index 0
#print("\npos_u0:\n", pos_u0)
#print("pos_u0.shape:", pos_u0.shape)  # (1, 5)

pos: tensor([0, 1, 2, 3, 4])
pos.shape: torch.Size([5])

pos_u1:
 tensor([[0],
        [1],
        [2],
        [3],
        [4]])
pos_u1.shape: torch.Size([5, 1])

pos_u0:
 tensor([[0, 1, 2, 3, 4]])
pos_u0.shape: torch.Size([1, 5])
