In [2]:
import os
print(os.getcwd())

os.chdir("../")
print(os.getcwd())

c:\Users\Mohit\Desktop\Mohit\Projects\TextSummarizer\research
c:\Users\Mohit\Desktop\Mohit\Projects\TextSummarizer


In [9]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

In [10]:
from src.textSummarizer.constants import *
from src.textSummarizer.utils.common import read_yaml, create_directories

In [17]:
class ConfigurationManager:
    def __init__(
            self, 
            config_path=CONFIG_FILE_PATH,
            params_file_path=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_path=config.model_path,
            tokenizer_path=config.tokenizer_path,
            metric_file_name=config.metric_file_name
        )

        return model_evaluation_config

In [18]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_from_disk
import torch
import pandas as pd
from tqdm import tqdm
import evaluate

In [23]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
    
    def generate_batch_sized_chunks(self, list_of_elements: list, batch_size: int):
        """Split the dataset into smaller batches so that we can process simuntaneously
        Yield successive batch-sized chunks from list_of_elements.
        """
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i: i+batch_size]

    def calculate_metric_on_test_ds(
        self,
        dataset,
        metric,
        model,
        tokenizer,
        batch_size=16,
        device="cuda" if torch.cuda.is_available() else "cpu",
        column_text="article",
        column_summary="highlights"
):
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))

        for article_batch, target_batch in tqdm(
            zip(article_batches, target_batches),
            total=len(article_batches)
        ):
            inputs = tokenizer(
                article_batch, 
                max_length=1024,
                truncation=True,
                padding="max_length",
                return_tensors="pt"
            )

            summaries = model.generate(
                input_ids=inputs["input_ids"].to(device),
                attention_mask=inputs["attention_mask"].to(device),
                length_penalty=0.8, # Ensures that the model does not generate sequences that are too long
                num_beams=8,
                max_length=128
            )

            # Finally decode the generated texts
            # Replace the token, and add the decoded texts with the references to the metric

            decoded_summaries = [
                tokenizer.decode(
                s,
                skip_special_tokens=True,
                clean_up_tokenization_spaces=True
            ) for s in summaries
            ]

            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]

            metric.add_batch(predictions=decoded_summaries, references=target_batch)

        # Finally compute and return the ROGUE scores
        score = metric.compute()
        return score

    def evaluate(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)

        # Loading data
        dataset_samsum_pt = load_from_disk(self.config.data_path)

        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]

        rouge_metric = evaluate.load("rouge")

        score = self.calculate_metric_on_test_ds(
        dataset=dataset_samsum_pt["test"][0: 10],
        metric=rouge_metric,
        model=model_pegasus,
        tokenizer=tokenizer,
        batch_size=2,
        column_text="dialogue",
        column_summary="summary"
    )

        # Directly use the scores wothout accessing fmeasure or mid
        rouge_dict = {rn: score[rn] for rn in rouge_names}

        df = pd.DataFrame(rouge_dict, index=[f"pegasus"])
        df.to_csv(self.config.metric_file_name, index=False)

In [24]:
config = ConfigurationManager()
model_evaluation_config = config.get_model_evaluation_config()
model_evaluation = ModelEvaluation(config=model_evaluation_config)
model_evaluation.evaluate()

[2025-01-21 20:49:25,913: INFO: common: Yaml file: config\config.yaml loaded successfully]
[2025-01-21 20:49:25,917: INFO: common: Yaml file: params.yaml loaded successfully]
[2025-01-21 20:49:25,920: INFO: common: Created directory at: artifacts]
[2025-01-21 20:49:25,920: INFO: common: Created directory at: artifacts/model_evaluation]


100%|██████████| 5/5 [01:51<00:00, 22.33s/it]

[2025-01-21 20:51:22,662: INFO: rouge_scorer: Using default tokenizer.]



