In [1]:
%pwd

'f:\\Project\\Text_Summarization\\research'

In [2]:
import os
os.chdir('../')
%pwd

'f:\\Project\\Text_Summarization'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen= True)
class ModelEvaluationConfig:
    root_dir: Path
    dataset_path: Path
    model_ckpt: Path
    tokenizer_path: Path
    metric_file_name: Path

In [4]:
from textSummarizer.constants import *
from textSummarizer.utils.common import read_yaml, create_directories

class ConfigManager:
    def __init__(self, config_path = CONFIG_FILE_PATH, params_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_path)
        create_directories([self.config.artifacts_root])
    
    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation
        model_eval_config = ModelEvaluationConfig(
            root_dir= config.root_dir,
            dataset_path= os.path.join(
                self.config.data_transformation.root_dir,
                self.config.dataset_name + '_dataset'),
            model_ckpt= os.path.join(self.config.model_trainer.root_dir, 'model'),
            tokenizer_path= os.path.join(self.config.model_trainer.root_dir, 'tokenizer'),
            metric_file_name  = config.metric_file_name
        )
        return model_eval_config
        

In [5]:
from textSummarizer.logging import logger
from datasets import load_dataset, load_from_disk, load_metric
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from tqdm import tqdm 
import pandas as pd

[2024-01-19 20:38:13,047 : INFO : utils : NumExpr defaulting to 4 threads.]


In [13]:
class ModelEvaluation:
    def __init__(self,config: ModelEvaluationConfig):
        self.config = config

    def generate_batch_sized_chunks(self, list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i: i+batch_size]
            
    def calculate_metric_on_test_dataset(self, dataset, tokenizer: AutoTokenizer, model: AutoModelForSeq2SeqLM, 
                                         metric, batch_size: int = 16,
                                         device = 'cuda' if torch.cuda.is_available() else 'cpu',
                                         text_column= 'dialogue', summary_column = 'summary'):
        x_batches = list(self.generate_batch_sized_chunks(dataset[text_column], batch_size))
        y_batches = list(self.generate_batch_sized_chunks(dataset[summary_column], batch_size))

        for x_batch, y_batch in tqdm(zip(x_batches, y_batches), total = len(x_batches)):
            inputs = tokenizer(x_batch, max_length = 1024, padding = 'max_length', truncation = True, return_tensors = 'pt')
            summaries = model.generate(input_ids = inputs['input_ids'].to(device), attention_mask = inputs['attention_mask'].to(device),
                                     length_penalty = 0.8, num_beams = 8, max_length = 128)
            decoded_summaries = [tokenizer.decode(s, skip_special_tokens = True, clean_up_tokenization_spaces = True) for s in summaries]
            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]
            metric.add_batch(predictions=decoded_summaries, references=y_batch)
            
        score = metric.compute()
        return score
    
    def evaluate(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        model = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(device)
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)

        datset = load_from_disk(os.path.join(self.config.dataset_path, 'test'))

        rouge_name = ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
        rouge_metric = load_metric('rouge')
        score  = self.calculate_metric_on_test_dataset(datset[0:2], tokenizer, model, rouge_metric, 2, device, 'dialogue', 'summary')

        rouge_dict = dict((rn, score[rn].mid.fmeasure) for rn in rouge_name)
        print(rouge_dict)
        df = pd.DataFrame([rouge_dict])
        print(df)
        df.to_csv(self.config.metric_file_name, index= 0)



In [14]:
try:
    config_manager = ConfigManager()
    model_evaluation_config = config_manager.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(model_evaluation_config)
    model_evaluation.evaluate()
except Exception as e:
    raise e

[2024-01-19 21:22:54,375 : INFO : common : yaml file : config\config.yml loaded succefully]
[2024-01-19 21:22:55,122 : INFO : common : yaml file : params.yml loaded succefully]
[2024-01-19 21:22:55,139 : INFO : common : Created Director at: artifacts]


100%|██████████| 1/1 [04:13<00:00, 253.25s/it]


[2024-01-19 21:33:51,128 : INFO : rouge_scorer : Using default tokenizer.]
{'rouge1': 0.023421325051759836, 'rouge2': 0.0, 'rougeL': 0.023421325051759836, 'rougeLsum': 0.023421325051759836}
     rouge1  rouge2    rougeL  rougeLsum
0  0.023421     0.0  0.023421   0.023421


OSError: Cannot save file into a non-existent directory: 'artifacts\model_evaluation'