In [2]:
import import_ipynb
from Encoder import Encoder, EncoderBlock, InputEmbeddingsLayer, PositionalEncodingLayer, MultiHeadAttentionBlock, FeedForwardBlock
from Decoder import Decoder, DecoderBlock, InputEmbeddingsLayer, PositionalEncodingLayer, MultiHeadAttentionBlock, FeedForwardBlock
import torch 
import torch.nn as nn 

importing Jupyter notebook from Decoder.ipynb


In [3]:
class LinearLayer(nn.Module):
    def __init__(self, d_model: int, vocab_size: int) -> None:
        super().__init__()
        self.Linear = nn.Linear(d_model, vocab_size)
    def forward(self, x):
        return self.Linear(x)

In [4]:
from dataclasses import dataclass
@dataclass
class Model_Arguments:
    source_vocab_size: int
    target_vocab_size: int 
    source_sequence_length: int 
    target_sequence_length: int 
    d_model: int = 512 
    Layers: int = 6
    heads: int = 8 
    dropout: float = 0.1 
    d_ff: int = 2048 

In [5]:
# Building the transformer block 
class TransformerBlock(nn.Module):
    def __init__(self, encoder: Encoder, decoder: Decoder, source_embedding: InputEmbeddingsLayer, target_embedding: InputEmbeddingsLayer, source_position: PositionalEncodingLayer, target_position: PositionalEncodingLayer, Linear: LinearLayer) -> None:
        super().__init__()
        self.encoder = encoder 
        self.decoder = decoder 
        self.source_embedding = source_embedding
        self.target_embedding = target_embedding
        self.source_position = source_position
        self.targ_position = target_position
        self.Linear = Linear 
    def encode(self, source_language, source_mask):
        source_language = self.source_embedding(source_language)
        source_language = self.source_position(source_language)
        return self.encoder(source_language, source_mask)
    def decode(self, Encoder_output, source_mask, target_language, target_mask):
        target_language = self.target_embedding(target_language)
        target_language = self.targ_position(target_language)
        return self.decoder(target_language, Encoder_output, source_mask, target_mask)
    def linear(self, x):
        return self.Linear(x)

# Transformer model skeletone 
def Transformer_model(Args: Model_Arguments)->TransformerBlock:

    source_embedding = InputEmbeddingsLayer(Args.d_model, Args.source_vocab_size)
    source_position = PositionalEncodingLayer(Args.d_model, Args.source_sequence_length, Args.dropout)

    target_embedding = InputEmbeddingsLayer(Args.d_model, Args.target_vocab_size)
    target_position = PositionalEncodingLayer(Args.d_model, Args.target_sequence_length, Args.dropout)

    Encoder_Blocks = []
    for _ in range(Args.Layers):
        encoder_self_attention_block = MultiHeadAttentionBlock(Args.d_model, Args.heads, Args.dropout)
        encoder_feed_forward_block = FeedForwardBlock(Args.d_model, Args.d_ff, Args.dropout)
        encoder_block = EncoderBlock(encoder_self_attention_block, encoder_feed_forward_block, Args.dropout)
        Encoder_Blocks.append(encoder_block)
    
    Decoder_Blocks = []
    for _ in range(Args.Layers):
        decoder_self_attention_block = MultiHeadAttentionBlock(Args.d_model, Args.heads, Args.dropout)
        decoder_cross_attention_block = MultiHeadAttentionBlock(Args.d_model, Args.heads, Args.dropout)
        decoder_feed_forward_block = FeedForwardBlock(Args.d_model, Args.d_ff, Args.dropout)
        decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block, decoder_feed_forward_block, Args.dropout)
        Decoder_Blocks.append(decoder_block)
    
    encoder = Encoder(nn.ModuleList(Encoder_Blocks))
    decoder = Decoder(nn.ModuleList(Decoder_Blocks))

    linear = LinearLayer(Args.d_model, Args.target_vocab_size)

    Transformer = TransformerBlock(encoder, decoder, source_embedding, target_embedding, source_position, target_position, linear)

    for t in Transformer.parameters():
        if t.dim() > 1:
            nn.init.xavier_uniform(t)
    return Transformer 