In [1]:
import os

In [2]:
%pwd

'/home/aman/Desktop/TextCraft/research'

In [3]:
os.chdir("../")

In [4]:
from dataclasses import dataclass
from pathlib import Path

In [5]:
@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

In [6]:
from TextCraft.utils.common import read_yaml, create_directories
from TextCraft.constants import *

In [7]:
class ConfigurationManager:
    def __init__(self, config_path = CONFIG_FILE_PATH, param_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.params = read_yaml(param_path)

        create_directories([self.config.artifacts_root])

    def get_eval_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        get_eval_config = ModelEvaluationConfig(
            root_dir= config.root_dir,
            data_path= config.data_path,
            model_path= config.model_path,
            tokenizer_path= config.tokenizer_path,
            metric_file_name= config.metric_file_name
        )
        return get_eval_config

In [8]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from datasets import load_dataset, load_from_disk, load_metric
import torch
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


[2024-09-12 23:27:08,145: INFO: config: PyTorch version 2.4.1 available.]


In [10]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    def generate_batched_chunks(self, list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i: i+batch_size]
    with torch.no_grad():
        def calculate_metric_on_test_ds(self,dataset, model, tokenizer, 
                                    batch_size=16, device="cuda" if torch.cuda.is_available() else "cpu", 
                                    column_text="text"):
            text_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))

            for text_batches in tqdm(text_batches, total=len(text_batches)):
                
                inputs = tokenizer(text_batches, max_length=1024,  truncation=True, 
                                padding="max_length", return_tensors="pt")
                
                input_ids = inputs['input_ids'].to(device)
                attention_mask = inputs['attention_mask'].to(device)
                
                labels = input_ids.clone()

                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss

                total_loss += loss.item() * input_ids.size(1) 
                total_tokens += input_ids.size(1)    
            perplexity = torch.exp(torch.tensor(total_loss / total_tokens))
            return perplexity.item()
                
    def evaluate(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = GPT2Tokenizer.from_pretrained(self.config.tokenizer_path)
        model = GPT2LMHeadModel.from_pretrained(self.config.model_path).to(device)

        dataset_pt = load_from_disk(self.config.data_path)

        # Calculate perplexity on the test dataset
        perplexity = self.calculate_metric_on_test_ds(
            dataset_pt['test'], model, tokenizer, batch_size=2, column_text='text'
        )

        # Save results to a CSV file
        results = {"perplexity": [perplexity]}
        df = pd.DataFrame(results, index=[f'{self.config.model_name}'])
        df.to_csv(self.config.metric_file_name, index=False)

In [11]:
try:
    config = ConfigurationManager()
    model_eval_config = config.get_eval_config()
    model_eval_config = ModelEvaluation(config = model_eval_config)

    metric_path = os.path.join("artifacts", "model_evaluation", "evaluation_results.csv")

    if os.path.exists(metric_path):
        print(f"Csv already exists at {metric_path}. Skipping ...")
    else:
        print("Csv not found. Starting evaluation...")
        model_eval_config.evaluate()
except Exception as e:
    raise e

[2024-09-12 23:27:27,018: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-09-12 23:27:27,259: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-12 23:27:27,268: INFO: common: created directory at: artifacts]
[2024-09-12 23:27:27,274: INFO: common: created directory at: artifacts/model_evaluation]
Csv already exists at artifacts/model_evaluation/evaluation_results.csv. Skipping ...
