# Building Transformers using Pytorch Transformers Method

<img src="https://media.geeksforgeeks.org/wp-content/uploads/20250325174552667398/transformer.png">

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
        )

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

In [4]:
class SimpleTransformers(nn.Module):
    def __init__(
        self, 
        src_vocab_size,
        tgt_vocab_size, 
        d_model=512,
        nhead=8,
        num_layers=4,
        dim_feedforward=2048,
        dropout=0.1,
        max_len=100
    ):
        super(SimpleTransformers, self).__init__()

        self.src_embedding = nn.Embedding(num_embeddings=src_vocab_size, embedding_dim=d_model)
        self.tgt_embedding = nn.Embedding(num_embeddings=tgt_vocab_size, embedding_dim=d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len)

        # Pytorch Transformer
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=num_layers, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=True)

        self.fc_out = nn.Linear(d_model, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.pos_encoder(self.src_embedding(src))
        tgt_emb = self.pos_encoder(self.tgt_embedding(tgt))

        output = self.transformer(src_emb, tgt_emb)
        return self.fc_out(output)