In [1]:
import os
%pwd
os.chdir("../")

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class model_trainer_config:
    root_dir: Path
        
    n_epochs: int
    clip: float
    teacher_forcing_ratio:  float



In [3]:
from src.Mini_Translator.constants import *
from src.Mini_Translator.utils.common import read_yaml, create_directories


In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_train_model_config(self) -> model_trainer_config:
        config = self.config.model_trainer
        params=self.params

        create_directories([config.root_dir])

        trainer_config=model_trainer_config(root_dir=config.root_dir,n_epochs=params.n_epochs,clip=params.clip,
                                          teacher_forcing_ratio=params.teacher_forcing_ratio)
        
        return trainer_config


In [5]:
import torch.nn as nn
import torch
import random
import tqdm
import numpy as np
import json
import torch.optim as optim

from src.Mini_Translator.logging import logger


In [12]:
class modelTrainer:
    def __init__(self,config:model_trainer_config,config_filepath = CONFIG_FILE_PATH):
        self.config=config
        self.config2=config_filepath

    def train_fn(self,model,data_loader,optimizer,criterion,clip,teacher_forcing_ratio, device):
        model.train()
        epoch_loss=0
        i=0
        for  batch in data_loader:
            if i<220:
                i=i+1
                continue
            print(i)
            i=i+1
            src=batch["de_ids"].to(device)
            #src=[src length , batch size]
            trg=batch["en_ids"].to(device)
            #trg=[trg length ,batch size]
            optimizer.zero_grad()
            output=model(src,trg,teacher_forcing_ratio)
            #output=[trg length , batch size, trg vocab size]
            output_dim=output.shape[-1]
            output=output[1:].view(-1,output_dim)
            #output=[(trg length -1) * batch size ,trg vocab size]
            trg=trg[1:].view(-1)
            loss=criterion(output,trg)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),clip)
            optimizer.step()
            epoch_loss +=loss.item()
        return epoch_loss / len(data_loader)
    
    def evaluate_fn(self,model,data_loader,criterion,device):
        model.eval()
        epoch_loss=0
        i=4
        with torch.no_grad():
            for batch in data_loader:

                if i<4:
                    i=i+1
                    continue
                print(i)
                src=batch["de_ids"].to(device)
                trg=batch["en_ids"].to(device)
                output=model(src,trg,0)
                output_dim=output.shape[-1]
                output=output[1:].view(-1,output_dim)
                trg=trg[1:].view(-1)
                loss=criterion(output,trg)
                epoch_loss+=loss.item()
        return epoch_loss / len(data_loader)
    

    def initiate_model_trainer(self):
        root_dir = "artifacts/data_transformation"
        train_data_loader_path = os.path.join(root_dir, "train_data_loader.pth")
        valid_data_loader_path = os.path.join(root_dir, "valid_data_loader.pth")

        # Load the DataLoader objects
        train_data_loader = torch.load(train_data_loader_path)
        valid_data_loader = torch.load(valid_data_loader_path)

        model_path = os.path.join("artifacts/base_model", 'complete_model.pth')

        model = torch.load(model_path)

        optimizer=optim.Adam(model.parameters())

        with open("metadata.json",'r') as file:
            f=json.load(file)
            pad_index=f["pad_index"]
            device=f["device"]
        criterion=nn.CrossEntropyLoss(ignore_index=pad_index)

        n_epochs=1
        clip=self.config.clip
        teacher_forcing_ratio= self.config.teacher_forcing_ratio
        best_valid_loss=float("inf")
        for epoch in tqdm.tqdm(range(n_epochs)):
            train_loss=self.train_fn(model,train_data_loader,optimizer,criterion,clip,teacher_forcing_ratio,device)
            valid_loss=self.evaluate_fn(model,valid_data_loader,criterion,device)
            if valid_loss < best_valid_loss:
                best_valid_loss=valid_loss
                torch.save(model.state_dict(),os.path.join(self.config.root_dir,"tut1-model.pt"))
        # Calculate perplexities
        train_ppl = np.exp(train_loss)
        valid_ppl = np.exp(valid_loss)

        # Print the losses and perplexities
        print(f"\tTrain Loss: {train_loss:7.3f} | Train PPL: {train_ppl:7.3f}")
        print(f"\tValid Loss: {valid_loss:7.3f} | Valid PPL: {valid_ppl:7.3f}")

        # Create a dictionary to hold the information
        results = {
            "train_loss": train_loss,
            "train_ppl": train_ppl,
            "valid_loss": valid_loss,
            "valid_ppl": valid_ppl
        }

        # Specify the path to the JSON file
        json_path = os.path.join(self.config.root_dir,"results.json")

        # Save the results to a JSON file
        with open(json_path, 'w') as f:
            json.dump(results, f, indent=4)

        logger.info(f"Results saved to {json_path}")


In [9]:
class Encoder(nn.Module):
  def __init__(self,input_dim,embedding_dim,hidden_dim,n_layers,dropout):
    super().__init__()
    self.hidden_dim=hidden_dim
    self.n_layers=n_layers
    self.embedding=nn.Embedding(input_dim,embedding_dim)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,n_layers,dropout=dropout)
    self.dropout=nn.Dropout(dropout)

  def forward(self, src):
    #src=[src length,batch_size]
    embedded=self.dropout(self.embedding(src))
    #embedded=[src length ,batch size,embedding_dim]
    outputs,(hidden,cell)=self.lstm(embedded)
    #outputs=[src length ,batch size,hidden dim * n directions]

    #hidden=[n layers * n directions ,batch size, hidden dim ]
    #cell=[n layers * n directions,batch size, hidden dim  ]
    return hidden,cell

In [10]:
class Decoder(nn.Module):
    def __init__(self, output_dim, embedding_dim, hidden_dim, n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.embedding = nn.Embedding(output_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=dropout)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, cell):
        # input = [batch size]
        # hidden = [n layers, batch size, hidden dim]
        # context = [n layers, batch size, hidden dim]
        input = input.unsqueeze(0)
        # input = [1, batch size]
        embedded = self.dropout(self.embedding(input))
        # embedded = [1, batch size, embedding dim]
        output, (hidden, cell) = self.lstm(embedded, (hidden, cell))
        # seq length and n directions will always be 1 in this decoder, therefore:
        # output = [1, batch size, hidden dim]
        # hidden = [n layers, batch size, hidden dim]
        # cell = [n layers, batch size, hidden dim]
        prediction = self.fc_out(output.squeeze(0))
        # prediction = [batch size, output dim]
        return prediction, hidden, cell


In [11]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        assert (
            encoder.hidden_dim == decoder.hidden_dim
        ), "Hidden dimensions of encoder and decoder must be equal!"
        assert (
            encoder.n_layers == decoder.n_layers
        ), "Encoder and decoder must have equal number of layers!"

    def forward(self, src, trg, teacher_forcing_ratio):
        # src = [src length, batch size] #src sentence
        # trg = [trg length, batch size] #target sentence
        # teacher_forcing_ratio is probability to use teacher forcing
        # e.g. if teacher_forcing_ratio is 0.5 we use ground-truth inputs 50% of the time
        batch_size = trg.shape[1]
        trg_length = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        # tensor to store decoder outputs
        outputs = torch.zeros(trg_length, batch_size, trg_vocab_size).to(self.device)
        # last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(src)
        # hidden = [n layers * n directions, batch size, hidden dim]
        # cell = [n layers * n directions, batch size, hidden dim]
        # first input to the decoder is the <sos> tokens
        input = trg[0, :]
        # input = [batch size]
        # first we pass all sos 128
        for t in range(1, trg_length):  # 1 to 34 not include 34
            # insert input token embedding, previous hidden and previous cell states
            # receive output tensor (predictions) and new hidden and cell states
            output, hidden, cell = self.decoder(input, hidden, cell)
            # output = [batch size, output dim]
            # hidden = [n layers, batch size, hidden dim]
            # cell = [n layers, batch size, hidden dim]
            # place predictions in a tensor holding predictions for each token
            outputs[t] = output
            # decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            # get the highest predicted token from our predictions
            top1 = output.argmax(1)
            # if teacher forcing, use actual next token as next input
            # if not, use predicted token
            input = trg[t] if teacher_force else top1
            # input = [batch size]
        return outputs

In [13]:
try:
    config = ConfigurationManager()
    get_model_config = config.get_train_model_config()
    model = modelTrainer(config=get_model_config)
    model.initiate_model_trainer()
except Exception as e:
    raise e

[2024-05-24 08:40:00,252: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-24 08:40:00,262: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-24 08:40:00,266: INFO: common: created directory at: artifacts]
[2024-05-24 08:40:00,269: INFO: common: created directory at: artifacts/trained_model]


  0%|          | 0/1 [00:00<?, ?it/s]

220
221
222
223
224
225
226
4
4
4
4
4
4
4
4


100%|██████████| 1/1 [01:44<00:00, 104.78s/it]

	Train Loss:   0.233 | Train PPL:   1.263
	Valid Loss:   5.811 | Valid PPL: 334.055
[2024-05-24 08:41:46,342: INFO: 2547751249: Results saved to artifacts/trained_model\results.json]



