In [4]:
import os

In [5]:
%pwd

'c:\\Users\\shiva\\OneDrive\\Desktop\\Work\\NLP\\Projects\\Text_Summarizer_Project\\research'

In [6]:
os.chdir("../")

In [7]:
%pwd

'c:\\Users\\shiva\\OneDrive\\Desktop\\Work\\NLP\\Projects\\Text_Summarizer_Project'

Defining an Entity:

In [8]:
from dataclasses import dataclass
from pathlib import Path

class ModelEvaluationConfig:
    def __init__(self, root_dir, data_path, model_path, tokenizer_path, metric_file_name):
        self.root_dir = root_dir
        self.data_path = data_path
        self.model_path = model_path
        self.tokenizer_path = tokenizer_path
        self.metric_file_name = metric_file_name 


Defining the configuration Manager:

In [9]:
from textSummarizer.constants import *
from textSummarizer.utils.common import read_yaml,create_directories

In [10]:
class ConfigurationManager:
    
    #Defining a constructor with following paramteres:
    def __init__(self,config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):

        #Obtaining the config from the file_path:
        self.config = read_yaml(config_filepath)
        
        #Obtaining the paramters from the file_path:
        self.params = read_yaml(params_filepath)

        #Creating a new artifact: root folder:
        create_directories([self.config.artifacts_root])


    #Defining a function to Obtain model evaluation:
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:

        #Obtaining the configuration of the current model:
        config = self.config.model_evaluation

        #Creating directory:
        create_directories([config.root_dir])

        #Defining model evaluation configuration:
        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name
        )

        #Returning the Model Evaluation config:
        return model_evaluation_config


Creating components:

In [11]:
pip install evaluate

Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install rouge_metric

Note: you may need to restart the kernel to use updated packages.


In [17]:
from transformers import pipeline, set_seed
from datasets import load_from_disk
import matplotlib.pyplot as plt
import pandas as pd
from evaluate import load
metric = load("rouge")
from evaluate import load as load_metric
from transformers import AutoModelForSeq2SeqLM,AutoTokenizer
import nltk
from nltk.tokenize import sent_tokenize
from tqdm import tqdm
import torch
import json

In [18]:
#Creating components for Model_Evaluation:
class ModelEvaluation:

    #Defining the constructor:
    def __init__(self,config: ModelEvaluationConfig):
        self.config = config


    def generate_batch_sized_chunks(self, list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            batch = list_of_elements[i : i + batch_size]
            if not batch:  # Prevents yielding empty batch
                break
            print(f"Processing batch {i // batch_size + 1}/{(len(list_of_elements) + batch_size - 1) // batch_size}")
            yield batch



    
    # Define the device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    #A function to calculate the metric of the test dataset:
    def calculate_metric_on_test_ds(self,dataset,metric,model,tokenizer,batch_size = 4,device = device,column_text = "article",column_summary = "highlights"):
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text],batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary],batch_size))

        for article_batch,target_batch in tqdm(
            zip(article_batches,target_batches),total = len(article_batches)):

            inputs = tokenizer(article_batch,max_length = 1024,truncation = True,padding = "max_length",return_tensors = "pt")

            summaries = model.generate(input_ids = inputs["input_ids"].to(device),
                                        attention_mask = inputs["attention_mask"].to(device),
                                        length_penalty = 0.8,num_beams = 8, max_length = 128)

            ''' parameter for length penalty ensures that the model does not generate sequences that are too long.'''

            # Finally, we decode the generated texts,
            # replace the token, and add the decoded texts with the references to the metric:
            decoded_summaries = [tokenizer.decode(s,skip_special_tokens = True,clean_up_tokenization_spaces = True) for s in summaries]

            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]

            metric.add_batch(predictions = decoded_summaries,references = target_batch)

        score = metric.compute()
        return score


    #A function to evaluate metric:
    def evaluate(self):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)

        #Loading data:
        dataset_samsum_pt = load_from_disk(self.config.data_path)

        rouge_names = ["rouge1","rouge2","rougeL","rougeLsum"]
        rouge_metric = load_metric('rouge') 

        # Defining the score:
        score = self.calculate_metric_on_test_ds(
            dataset_samsum_pt['test'], rouge_metric,model_pegasus,tokenizer,
            column_text = "dialogue",column_summary = "summary"
        )

        with open("artifacts/model_evaluation/rouge_raw_scores.json", "w") as f:
            json.dump({k: float(v) for k, v in score.items()}, f, indent=4)

        #Defining the rouge dictionary for metrices:
        rouge_dict = dict((rn,score[rn]) for rn in rouge_names)

        #Converting the metric dictionary into dataframe:
        df = pd.DataFrame(rouge_dict,index = ['pegasus'])

        #Saving the dataframe in csv format:
        df.to_csv(self.config.metric_file_name,index = False)

Defining the PipeLine:

In [19]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation_config = ModelEvaluation(config = model_evaluation_config)
    model_evaluation_config.evaluate()

except Exception as e:
    raise e

[2025-04-05 15:26:40,024: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-05 15:26:40,032: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-05 15:26:40,032: INFO: common: created directory at: artifacts]
[2025-04-05 15:26:40,041: INFO: common: created directory at: artifacts/model_evaluation]


Processing batch 1/205
Processing batch 2/205
Processing batch 3/205
Processing batch 4/205
Processing batch 5/205
Processing batch 6/205
Processing batch 7/205
Processing batch 8/205
Processing batch 9/205
Processing batch 10/205
Processing batch 11/205
Processing batch 12/205
Processing batch 13/205
Processing batch 14/205
Processing batch 15/205
Processing batch 16/205
Processing batch 17/205
Processing batch 18/205
Processing batch 19/205
Processing batch 20/205
Processing batch 21/205
Processing batch 22/205
Processing batch 23/205
Processing batch 24/205
Processing batch 25/205
Processing batch 26/205
Processing batch 27/205
Processing batch 28/205
Processing batch 29/205
Processing batch 30/205
Processing batch 31/205
Processing batch 32/205
Processing batch 33/205
Processing batch 34/205
Processing batch 35/205
Processing batch 36/205
Processing batch 37/205
Processing batch 38/205
Processing batch 39/205
Processing batch 40/205
Processing batch 41/205
Processing batch 42/205
P

100%|██████████| 205/205 [2:39:27<00:00, 46.67s/it]  

[2025-04-05 18:06:17,405: INFO: rouge_scorer: Using default tokenizer.]



