In [1]:
%pwd

'c:\\Users\\HP\\OneDrive\\Desktop\\resume_ready_projects\\text_summarizer_project\\research'

In [2]:
import os

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\HP\\OneDrive\\Desktop\\resume_ready_projects\\text_summarizer_project'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

In [6]:
from textSummarizer.constants import *
from textSummarizer.utils.common import read_yaml,create_directories

[2024-09-30 12:03:53,353] [INFO] [__init__] - Logging setup is working correctly.


In [7]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_roots])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            data_path = config.data_path,
            model_path=config.model_path,
            tokenizer_path=config.tokenizer_path,
            metric_file_name=config.metric_file_name
        )

        return model_evaluation_config

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset,load_from_disk
import evaluate
import pandas as pd
from tqdm import tqdm
import torch

  from .autonotebook import tqdm as notebook_tqdm


[2024-09-30 12:03:58,026] [INFO] [config] - PyTorch version 2.4.1 available.


In [19]:
class ModelEvaluation:
    def __init__(self,config:ModelEvaluationConfig):
        self.config = config

    def generate_batch_sized_chunks(self,list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i : i + batch_size]


    def calculate_metric_on_test_ds(self,dataset, metric, model, tokenizer,
                               batch_size=16, device="cuda" if torch.cuda.is_available() else "cpu",
                               column_text="article",
                               column_summary="highlights"):
    # Generate batch-sized chunks for both articles and summaries
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))

        for article_batch, target_batch in tqdm(
            zip(article_batches, target_batches), total=len(article_batches)):

        # Tokenize the input articles with padding and truncation
            inputs = tokenizer(article_batch, max_length=1024, truncation=True,
                           padding="max_length", return_tensors="pt")

        # Generate summaries using the model
            summaries = model.generate(input_ids=inputs["input_ids"].to(device),
                                   attention_mask=inputs["attention_mask"].to(device),
                                   length_penalty=0.8, num_beams=8, max_length=128)

        # Decode the generated summaries
            decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
                                              clean_up_tokenization_spaces=True)
                             for s in summaries]

        # Convert target_batch (references) to list of lists if they are individual strings
            if isinstance(target_batch[0], str):
                target_batch = [[ref] for ref in target_batch]

        # Add predictions and references to the metric batch
            metric.add_batch(predictions=decoded_summaries, references=target_batch)

    # Compute and return the final ROUGE score
        score = metric.compute()
        return score
    
    def Evaluate(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)

        dataset_samsum_pt = load_from_disk(self.config.data_path)

        rouge_names = ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
        rouge_metric = evaluate.load('rouge')
        score = self.calculate_metric_on_test_ds(
                dataset_samsum_pt['test'][0:30], rouge_metric, model_pegasus, tokenizer, batch_size = 2,
                column_text = 'dialogue', column_summary= 'summary')

        rouge_dict = {rn: score[rn] for rn in rouge_names}
        rouge_df = pd.DataFrame([rouge_dict])
        rouge_df.to_csv(self.config.metric_file_name, index=False)




In [20]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    model_evaluation.Evaluate()
except Exception as e:
    raise e

[2024-09-30 12:25:38,238] [INFO] [common] - yaml file: config\config.yaml loaded successfully
[2024-09-30 12:25:38,243] [INFO] [common] - yaml file: params.yaml loaded successfully
[2024-09-30 12:25:38,245] [INFO] [common] - directory created: artifacts
[2024-09-30 12:25:38,246] [INFO] [common] - directory created: artifacts/model_evaluation


100%|██████████| 15/15 [13:56<00:00, 55.78s/it]


[2024-09-30 12:39:41,097] [INFO] [rouge_scorer] - Using default tokenizer.
