In [1]:
%pwd

'/home/jovyan/workspace/Text-Summarization/research'

In [2]:
import os
os.chdir('../')

In [3]:
%pwd

'/home/jovyan/workspace/Text-Summarization'

## Entities 

In [4]:
from dataclasses import dataclass
from pathlib import Path

In [5]:

@dataclass(frozen=True)
class EvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path
    
    

## configurtions manager in src/config

In [6]:
from textSummarization.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from textSummarization.utils.common import read_yaml, create_directories

In [14]:
class ConfigurationManager():
    def __init__(
            self,
            config_filepath=CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH
    ):
        print("Configuration Manager Initiated")
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.model_evaluation.root_dir])

    def get_evaluation_config(self) -> EvaluationConfig:
        eval_config = EvaluationConfig(
            root_dir=Path(self.config.model_evaluation.root_dir),
            data_path=Path(self.config.model_evaluation.data_path),
            model_path=Path(self.config.model_evaluation.model_path),
            tokenizer_path=Path(self.config.model_evaluation.tokenizer_path),
            metric_file_name=Path(self.config.model_evaluation.metric_file_name)
        )
        return eval_config


## Components

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_from_disk, load_dataset
import torch
import pandas as pd
from tqdm import tqdm

In [9]:
import evaluate

In [12]:
class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config
        
        
    def generate_batch_sized_chunks(self, list_of_elements, batch_size):
        
        
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i:i+batch_size]
    
    
    def calculate_metric_on_test_ds(self, dataset, metric, model, tokenizer, device,
                                batch_size = 16,
                                column_text = 'article',
                                column_summary = 'highlights'):
        
        article_batches = list(self.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(self.generate_batch_sized_chunks(dataset[column_summary], batch_size))
        
        for article_batch, target in tqdm(zip(article_batches, target_batches),
                                          total = len(article_batches)):
            inputs = tokenizer(article_batch, truncation = True, padding = "max_length",
                         max_length = 1024, return_tensors="pt")
            summaries = model.generate(
                input_ids = inputs['input_ids'].to(device),
                attention_mask = inputs['attention_mask'].to(device),
                length_penalty = 0.8, num_beams = 4, max_length = 128
            )
            
            # paramter for penalty ensures that ............
            # finally, we decode the generated texts,
            # replace the token and add the decoded texts with the references to the metric
            
            decoded_summaries = [tokenizer.decode(s, skip_special_tokens = True,
                                                  clean_up_tokenization_spaces = True) for s in summaries]
            
            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]
            metric.add_batch(predictions = decoded_summaries, references = target)
            
        score = metric.compute()
        return score
    
    
    def evaluate(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
        
        # loading the dataset
        
        dataset_samsum_pt =  load_from_disk(self.config.data_path)
        
        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
        rouge_metric = evaluate.load('rouge')
        
        score =  self.calculate_metric_on_test_ds(
            dataset_samsum_pt['test'][0:10], rouge_metric, model_pegasus, tokenizer,
            batch_size = 2, column_text = 'dialogue', column_summary = 'summary', device = device
        )
        print(score)
        rouge_dict  = dict((rn, score[rn]) for rn in rouge_names)
        df = pd.DataFrame(rouge_dict, index = [f"pegasus"])
        df.to_csv(self.config.metric_file_name)
    

## Pipeline

In [15]:
try:
    config = ConfigurationManager()
    evaluation_config = config.get_evaluation_config()
    evaluation = Evaluation(config=evaluation_config)
    evaluation.evaluate()
except Exception as e:
    raise e


Configuration Manager Initiated
[2023-10-24 21:00:51,766: INFO: common] yaml file: config/config.yaml loaded successfully
[2023-10-24 21:00:51,769: INFO: common] yaml file: params.yaml loaded successfully
[2023-10-24 21:00:51,771: INFO: common] directory: artifacts/model_evaluation created successfully


100%|██████████| 5/5 [00:09<00:00,  1.82s/it]

[2023-10-24 21:01:12,556: INFO: rouge_scorer] Using default tokenizer.





{'rouge1': 0.02088039683055932, 'rouge2': 0.0, 'rougeL': 0.020484658677419974, 'rougeLsum': 0.020294797623617775}
