In [3]:
import os 
from pathlib import Path

In [4]:
%pwd

'/home/priyanshu1303d/Projects/DeepQA_PyTorch/research'

In [5]:
os.chdir("../")

In [6]:
%pwd

'/home/priyanshu1303d/Projects/DeepQA_PyTorch'

In [7]:
from dataclasses import dataclass

In [8]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir : Path
    data_path : Path
    output_path : Path
    vocab_file_path : Path
    epochs: int
    weight_decay: float
    learning_rate : float
    optimizer: list

In [9]:
from DeepQA.constants import *
from DeepQA.utils.common import read_yaml , create_directories , get_size
from DeepQA.logging import logger

In [10]:
class ConfigurationManager:
    def __init__(self , config_filepath = CONFIG_FILE_PATH , params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:

        config = self.config.model_trainer

        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir = config.root_dir,
            data_path= config.data_path,
            output_path= config.output_path,
            vocab_file_path = config.vocab_file_path,
            epochs = params.epochs,
            weight_decay = params.weight_decay,
            learning_rate = params.learning_rate,
            optimizer = params.optimizer
        )

        return model_trainer_config

# Stage 3 DataSet 

In [11]:
import torch 
from torch.utils.data import Dataset , DataLoader
import ast

In [12]:
class QA_Dataclass(Dataset):
    def __init__(self , df , vocab):
        self.df = df
        self.vocab = vocab

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        # Convert string representations of lists back to actual lists
        numerical_question = ast.literal_eval(self.df.iloc[index]['question_indices'])
        numerical_answer = ast.literal_eval(self.df.iloc[index]['answer_indices'])

        # Convert to PyTorch tensors
        question_tensor = torch.tensor(numerical_question, dtype=torch.long).unsqueeze(0)  # Add batch dimension
        answer_tensor = torch.tensor(numerical_answer, dtype=torch.long)

        return question_tensor, answer_tensor

In [13]:
import pandas as pd
df_final = pd.read_csv('artifacts/data_transformation/Preprocessed_Data/preprocessed_data.csv')
import json

vocab_path = "artifacts/data_transformation/Vocab/vocab.json"

with open(vocab_path, "r") as f:
    vocab = json.load(f)
dataset = QA_Dataclass(df_final , vocab)

In [14]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Model Training

In [15]:
import torch.nn as nn
import torch.optim as optim
import json

In [16]:
class ModelTrainer:
    def __init__(self , config : ModelTrainerConfig ):
        super().__init__()

        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        with open(config.vocab_file_path, 'r') as f:
            vocab = json.load(f)
        self.vocab_size = len(vocab)

        #model creation
        self.model = self._build_model(self.vocab_size ).to(self.device)

        #model save path 
        self.output_path = Path(config.output_path)

        #params init
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=config.learning_rate)

    def _build_model(self , vocab_size):
        """Builds and returns the model"""
        return RNNModel(vocab_size, embedding_dim=50, hidden_size=64)


    def train(self, train_loader):
        """Train the model using the given dataloader."""
        logger.info(f"-------------Started Training----------")
        self.model.train()

        for epoch in range(self.config.epochs):
            running_loss = 0.0  # Move inside epoch loop

            for question, answer in train_loader:
                question, answer = question.to(self.device), answer.to(self.device)

                self.optimizer.zero_grad()

                # Forward pass
                output = self.model(question)

                if output is None:
                    print("⚠️ Warning: Model output is None. Skipping this batch.")
                    continue

                # Compute loss
                loss = self.criterion(output, answer.squeeze(1))

                # Backpropagation
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()  # ✅ Accumulate loss

            avg_loss = running_loss / len(train_loader)  # ✅ Compute average loss
            logger.info(f"Epoch [{epoch+1}/{self.config.epochs}], Loss: {avg_loss:.4f}")

        # Save the model (fix below)
        model_path = self.output_path / "qa_rnn.pth"
        torch.save(self.model.state_dict(), str(model_path))

        # torch.save(self.model, str(model_path))  # Saves the whole model

        logger.info(f"✅ Model saved at {model_path}")


    def evaluate(self, val_loader):
        """Evaluate the model on validation data."""
        self.model.eval()  # Set model to evaluation mode
        total_loss = 0

        with torch.no_grad():  # No gradients needed during evaluation
            for question, answer in val_loader:
                question, answer = question.to(self.device), answer.to(self.device)

                # Forward pass
                output = self.model(question)
                loss = self.criterion(output, answer.squeeze(1))
                
                total_loss += loss.item()

        avg_loss = total_loss / len(val_loader)
        print(f"🔹 Validation Loss: {avg_loss:.4f}")
        return avg_loss


class RNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
    
    def forward(self, x):
        x = self.embedding(x) 

        # 🚨 Fix: Remove the extra dimension if needed
        if x.dim() == 4:  
            x = x.squeeze(1)  # Remove the unnecessary 1-dim (batch_size, 1, seq_len, embedding_dim) → (batch_size, seq_len, embedding_dim)
        
        output, hidden = self.rnn(x)  # Pass through RNN
        output = self.fc(output[:, -1, :])  # Take the last output for classification

        return output



In [17]:
try:
    config = ConfigurationManager()
    get_model_trainig_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(get_model_trainig_config)
    train_loader = DataLoader(dataset , batch_size= 1, shuffle=True,  pin_memory=True)
    model_trainer.train(train_loader)
    model_trainer.evaluate(train_loader)

except Exception as e:
    raise e

[2025-04-03 18:58:56,182 : INFO : common  : yaml file config/config.yaml was read succesfully]
[2025-04-03 18:58:56,185 : INFO : common  : yaml file params.yaml was read succesfully]
[2025-04-03 18:58:56,186 : INFO : common  : Created directory at : artifacts]
[2025-04-03 18:58:56,187 : INFO : common  : Created directory at : artifacts/model_trainer]
[2025-04-03 18:58:57,634 : INFO : 12142567  : -------------Started Training----------]
[2025-04-03 18:58:58,170 : INFO : 12142567  : Epoch [1/30], Loss: 6.0406]
[2025-04-03 18:58:58,356 : INFO : 12142567  : Epoch [2/30], Loss: 3.7010]
[2025-04-03 18:58:58,563 : INFO : 12142567  : Epoch [3/30], Loss: 1.7173]
[2025-04-03 18:58:58,760 : INFO : 12142567  : Epoch [4/30], Loss: 0.8524]
[2025-04-03 18:58:58,952 : INFO : 12142567  : Epoch [5/30], Loss: 0.4105]
[2025-04-03 18:58:59,134 : INFO : 12142567  : Epoch [6/30], Loss: 0.4558]
[2025-04-03 18:58:59,318 : INFO : 12142567  : Epoch [7/30], Loss: 0.3043]
[2025-04-03 18:58:59,500 : INFO : 12142567