In [1]:
import os
from pathlib import Path

from dataclasses import dataclass

In [2]:
%pwd

'/home/priyanshu1303d/Projects/DeepQA_PyTorch/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/priyanshu1303d/Projects/DeepQA_PyTorch'

In [5]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir:Path
    saved_model_path: Path
    model_metrics_json: Path
    vocab_file_path : Path
    data_path : Path

In [6]:
from DeepQA.constants import *
from DeepQA.utils.common import read_yaml , create_directories

In [7]:
class ConfigurationManager:
    def __init__(self , config_filepath = CONFIG_FILE_PATH , params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir= config.root_dir,
            model_metrics_json= config.model_metrics_json,
            saved_model_path= config.saved_model_path,
            vocab_file_path= config.vocab_file_path,
            data_path= config.data_path

        )

        return model_evaluation_config

In [8]:
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score

In [9]:
import torch 
from torch.utils.data import Dataset , DataLoader
import ast

In [17]:
class QA_Dataclass(Dataset):
    def __init__(self , df , vocab):
        self.df = df
        self.vocab = vocab
        print(type(self.df))

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        # Convert string representations of lists back to actual lists
        numerical_question = ast.literal_eval(self.df.iloc[index]['question_indices'])
        numerical_answer = ast.literal_eval(self.df.iloc[index]['answer_indices'])

        # Convert to PyTorch tensors
        question_tensor = torch.tensor(numerical_question, dtype=torch.long).unsqueeze(0)  # Add batch dimension
        answer_tensor = torch.tensor(numerical_answer, dtype=torch.long)

        return question_tensor, answer_tensor

In [20]:
import torch.serialization
import json
import pandas as pd

In [25]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score

class RNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=50, hidden_size=64):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
    
    def forward(self, x):
        x = self.embedding(x) 

        # 🚨 Fix: Remove the extra dimension if needed
        if x.dim() == 4:  
            x = x.squeeze(1)  # Remove the unnecessary 1-dim (batch_size, 1, seq_len, embedding_dim) → (batch_size, seq_len, embedding_dim)
        
        output, hidden = self.rnn(x)  # Pass through RNN
        output = self.fc(output[:, -1, :])  # Take the last output for classification

        return output


class ModelEvaluation:
    def __init__(self, config):
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        with open(config.vocab_file_path, 'r') as f:
            vocab = json.load(f)
        self.vocab_size = len(vocab)

        # Define the model architecture (same as training)
        self.model = RNNModel(vocab_size=self.vocab_size).to(self.device)

        # Load model weights
        self.model = RNNModel(vocab_size=self.vocab_size).to(self.device)  # Define model
        self.model.load_state_dict(torch.load(self.config.saved_model_path, map_location=self.device))  # ✅ Load weights
        self.model.eval()

        df = pd.read_csv(self.config.data_path)  # ✅ Ensure it's a DataFrame

        self.dataset = QA_Dataclass(df, vocab)
        self.test_loader = DataLoader(self.dataset, batch_size=1, shuffle=True, pin_memory=True)

    def evaluate_model(self):
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for batch in self.test_loader:
                inputs, labels = batch  
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                outputs = self.model(inputs)
                predictions = torch.argmax(outputs, dim=1)

                all_preds.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        accuracy = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds, average='weighted')

        print(f"🔹 Accuracy: {accuracy * 100:.2f}%")
        print(f"🔹 F1 Score: {f1:.4f}")

        return accuracy, f1
    
    def save_metrics(self, accuracy, f1):
        """Save accuracy and F1-score to a JSON file."""
        results = {
            "accuracy": accuracy,
            "f1_score": f1
        }

        # Ensure the directory exists
        model_mertics_path = Path(self.config.model_metrics_json)
        os.makedirs(model_mertics_path, exist_ok=True)

        # Save results to JSON
        results_file = os.path.join(model_mertics_path, "results.json")
        with open(results_file, "w") as f:
            json.dump(results, f, indent=4)

        print(f"✅ Evaluation metrics saved at: {results_file}")



In [28]:
try:
    config = ConfigurationManager()
    get_model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(get_model_evaluation_config)
    
    accuracy , f1 = model_evaluation.evaluate_model()
    print(f1 ," " ,accuracy)
    model_evaluation.save_metrics(accuracy, f1)

except Exception as e:
    raise e

[2025-04-03 19:20:06,283 : INFO : common  : yaml file config/config.yaml was read succesfully]
[2025-04-03 19:20:06,286 : INFO : common  : yaml file params.yaml was read succesfully]
[2025-04-03 19:20:06,286 : INFO : common  : Created directory at : artifacts]
[2025-04-03 19:20:06,287 : INFO : common  : Created directory at : artifacts/model_evaluation]
<class 'pandas.core.frame.DataFrame'>
🔹 Accuracy: 100.00%
🔹 F1 Score: 1.0000
1.0   1.0
✅ Evaluation metrics saved at: artifacts/model_evaluation/results.json
