In [14]:
import torch.nn as nn
import torch
import math

In [70]:
class InputEmbedding(nn.Module):
    def __init__(self, d_model, vocab_size):
        super(InputEmbedding, self).__init__()

        self.vocab_size = vocab_size
        self.d_model = d_model
        self.embedding = nn.Embedding(vocab_size, d_model)

    def forward(self, x):
        return self.embedding(x) * math.sqrt(self.d_model)

In [71]:
embedding_layer = InputEmbedding(vocab_size=10_000, d_model=512)
embedded_output = embedding_layer(torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]]))
embedded_output.shape

torch.Size([2, 4, 512])

### Position Embedding

In [90]:
position = torch.arange(0, 14).unsqueeze(1)
position.shape

torch.Size([14, 1])

In [89]:
div_term = torch.exp(torch.arange(
    0, 512, 2, dtype=torch.float) * -(math.log(10000) / 512)).unsqueeze(0)

div_term.shape

torch.Size([1, 256])

In [91]:
value = (position * div_term).unsqueeze(0)
value.shape

torch.Size([1, 14, 256])

In [83]:

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        N = 10000.0
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)

        # step = 2 ,vì pe cũng nhảy step = 2 
        div_term = torch.exp(torch.arange(0, d_model, 2, dtype=torch.float) * -(math.log(N) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        # thêm chiều 0 để 'pe' broadcasting với 'x'
        self.register_buffer('pe', pe.unsqueeze(0))
    
    def forward(self, x):
        
        # slicing 'pe' để có cùng shape với 'x'
        return x + self.pe[:, :x.size(1)]
    

pos_encoding_layer = PositionalEncoding(d_model=512, max_len=14)

pos_encoding = pos_encoding_layer(embedded_output)
pos_encoding.shape
    

torch.Size([2, 4, 512])