In [1]:
import os

In [2]:
%pwd

'c:\\Users\\lenovo\\Desktop\\Mini_Translator\\research'

In [3]:
os.chdir("../")

In [8]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelConfig:
    root_dir: Path
    encoder_embedding_dim: int
    decoder_embedding_dim: int
    hidden_dim: int
    n_layers: int
    encoder_dropout: float
    decoder_dropout: float
    


In [9]:
from src.Mini_Translator.constants import *
from src.Mini_Translator.utils.common import read_yaml, create_directories

In [29]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_config(self) -> ModelConfig:
        config = self.config.base_Model
        params=self.params

        create_directories([config.root_dir])

        model_config=ModelConfig(root_dir=config.root_dir,
                                 encoder_embedding_dim =params.encoder_embedding_dim,
                        decoder_embedding_dim = params.decoder_embedding_dim,
                        hidden_dim = params.hidden_dim,
                        n_layers= params.n_layers,
                        encoder_dropout = params.encoder_dropout,
                        decoder_dropout = params.decoder_dropout)
        
        return model_config


In [30]:
import torch.nn as nn
import torch
import random

In [31]:
class Encoder(nn.Module):
  def __init__(self,input_dim,embedding_dim,hidden_dim,n_layers,dropout):
    super().__init__()
    self.hidden_dim=hidden_dim
    self.n_layers=n_layers
    self.embedding=nn.Embedding(input_dim,embedding_dim)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,n_layers,dropout=dropout)
    self.dropout=nn.Dropout(dropout)

  def forward(self, src):
    #src=[src length,batch_size]
    embedded=self.dropout(self.embedding(src))
    #embedded=[src length ,batch size,embedding_dim]
    outputs,(hidden,cell)=self.lstm(embedded)
    #outputs=[src length ,batch size,hidden dim * n directions]

    #hidden=[n layers * n directions ,batch size, hidden dim ]
    #cell=[n layers * n directions,batch size, hidden dim  ]
    return hidden,cell

In [32]:
class Decoder(nn.Module):
    def __init__(self, output_dim, embedding_dim, hidden_dim, n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(output_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=dropout)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, cell):
        # input = [batch size]
        # hidden = [n layers, batch size, hidden dim]
        # context = [n layers, batch size, hidden dim]
        input = input.unsqueeze(0)
        # input = [1, batch size]
        embedded = self.dropout(self.embedding(input))
        # embedded = [1, batch size, embedding dim]
        output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
        # seq length and n directions will always be 1 in this decoder, therefore:
        # output = [1, batch size, hidden dim]
        # hidden = [n layers, batch size, hidden dim]
        # cell = [n layers, batch size, hidden dim]
        prediction = self.fc_out(output.squeeze(0))
        # prediction = [batch size, output dim]
        return prediction, hidden, cell


In [33]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        assert (
            encoder.hidden_dim == decoder.hidden_dim
        ), "Hidden dimensions of encoder and decoder must be equal!"
        assert (
            encoder.n_layers == decoder.n_layers
        ), "Encoder and decoder must have equal number of layers!"

    def forward(self, src, trg, teacher_forcing_ratio):
        # src = [src length, batch size] #src sentence
        # trg = [trg length, batch size] #target sentence
        # teacher_forcing_ratio is probability to use teacher forcing
        # e.g. if teacher_forcing_ratio is 0.5 we use ground-truth inputs 50% of the time
        batch_size = trg.shape[1]
        trg_length = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        # tensor to store decoder outputs
        outputs = torch.zeros(trg_length, batch_size, trg_vocab_size).to(self.device)
        # last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(src)
        # hidden = [n layers * n directions, batch size, hidden dim]
        # cell = [n layers * n directions, batch size, hidden dim]
        # first input to the decoder is the <sos> tokens
        input = trg[0, :]
        # input = [batch size]
        # first we pass all sos 128
        for t in range(1, trg_length):  # 1 to 34 not include 34
            # insert input token embedding, previous hidden and previous cell states
            # receive output tensor (predictions) and new hidden and cell states
            output, hidden, cell = self.decoder(input, hidden, cell)
            # output = [batch size, output dim]
            # hidden = [n layers, batch size, hidden dim]
            # cell = [n layers, batch size, hidden dim]
            # place predictions in a tensor holding predictions for each token
            outputs[t] = output
            # decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            # get the highest predicted token from our predictions
            top1 = output.argmax(1)
            # if teacher forcing, use actual next token as next input
            # if not, use predicted token
            input = trg[t] if teacher_force else top1
            # input = [batch size]
        return outputs

In [38]:
import json
import os
import torch
import torch.nn as nn
from src.Mini_Translator.logging import logger

class Base_Model:
    def __init__(self, config: ModelConfig):
        self.config = config

    def initiate_prepare_base_model(self):
        with open('metadata.json', 'r') as file:
            f = json.load(file)
            en_vocab = f['en_vocab']
            de_vocab = f['de_vocab']

        input_dim = de_vocab
        output_dim = en_vocab

        encoder_embedding_dim = self.config.encoder_embedding_dim
        decoder_embedding_dim = self.config.decoder_embedding_dim
        hidden_dim = self.config.hidden_dim
        n_layers = self.config.n_layers
        encoder_dropout = self.config.encoder_dropout
        decoder_dropout = self.config.decoder_dropout

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        encoder = Encoder(input_dim, encoder_embedding_dim, hidden_dim, n_layers, encoder_dropout)
        decoder = Decoder(output_dim, decoder_embedding_dim, hidden_dim, n_layers, decoder_dropout)
        model = Seq2Seq(encoder, decoder, device).to(device)

        def init_weights(m):
            for name, param in m.named_parameters():
                nn.init.uniform_(param.data, -0.08, 0.08)
        model.apply(init_weights)

        def count_parameters(model):
            return sum(p.numel() for p in model.parameters() if p.requires_grad)
        logger.info(f"The Model has {count_parameters(model):,} trainable parameters")

        root_dir = self.config.root_dir

        # Save the complete model
        model_path = os.path.join(root_dir, 'complete_model.pth')
        torch.save(model, model_path)
        logger.info(f"Complete model saved to {model_path}")

        # Update metadata.json with device information
        metadata_path = 'metadata.json'
        with open(metadata_path, 'r') as file:
            metadata = json.load(file)

        metadata['device'] = str(device)

        with open(metadata_path, 'w') as file:
            json.dump(metadata, file, indent=4)
        logger.info(f"Device information saved to {metadata_path}")



In [39]:
try:
    config = ConfigurationManager()
    get_model_config = config.get_model_config()
    base_model = Base_Model(config=get_model_config)
    base_model.initiate_prepare_base_model()
except Exception as e:
    raise e

[2024-05-22 22:11:33,849: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-22 22:11:33,858: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-22 22:11:33,858: INFO: common: created directory at: artifacts]
[2024-05-22 22:11:33,858: INFO: common: created directory at: artifacts/base_model]


[2024-05-22 22:11:35,689: INFO: 3222732677: The Model has 13,898,501 trainable parameters]
[2024-05-22 22:11:35,840: INFO: 3222732677: Complete model saved to artifacts/base_model\complete_model.pth]
[2024-05-22 22:11:35,855: INFO: 3222732677: Device information saved to metadata.json]
