In [1]:
import os
%pwd

'c:\\Users\\latif\\OneDrive\\Belgeler\\GitHub\\Text_Summarization\\research'

In [2]:
os.chdir("../")

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path

    

In [4]:
from text_summarization.constants import *
from text_summarization.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
    
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            metric_file_name = config.metric_file_name
        )


        return model_evaluation_config

In [6]:
from datasets import load_from_disk, load_dataset, load_metric
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import pandas as pd
from tqdm import tqdm

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]


In [8]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    
    def generate_batch_sized_chunks(list_of_elements, batch_size):
        for i in range(0, len(list_of_elements), batch_size):
            yield list_of_elements[i : i + batch_size]

    
    def calculate_metric_on_test_ds(dataset, metric, model, tokenizer,
                                   batch_size=16, device=None,
                                   column_text='article',
                                   column_summary='highlights'):
        if device is None:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        article_batches = list(ModelEvaluation.generate_batch_sized_chunks(dataset[column_text], batch_size))
        target_batches = list(ModelEvaluation.generate_batch_sized_chunks(dataset[column_summary], batch_size))

        for article_batch, target_batch in tqdm(
            zip(article_batches, target_batches), total=len(article_batches)):

            inputs = tokenizer(article_batch, max_length=1024, truncation=True,
                              padding="max_length", return_tensors="pt")

            summaries = model.generate(input_ids=inputs["input_ids"].to(device),
                                      attention_mask=inputs["attention_mask"].to(device),
                                      length_penalty=0.8, num_beams=8, max_length=128)

            decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
                                                  clean_up_tokenization_spaces=True)
                                 for s in summaries]

            decoded_summaries = [d.replace("", " ") for d in decoded_summaries]

            metric.add_batch(predictions=decoded_summaries, references=target_batch)

        score = metric.compute()
        return score
    
    def evaluate(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)

        dataset_samsum_pt = load_from_disk(self.config.data_path)

        rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
        rouge_metric = load_metric('rouge',trust_remote_code=True)  

        score = calculate_metric_on_test_ds(
        dataset_samsum_pt['test'][0:10],rouge_metric,trainer.model, tokenizer, batch_size=2, column_text='dialogue', column_summary='summary'
            )

        rouge_dict = dict((rn, score[rn].mid.fmeasure) for rn in rouge_names)
        pd.DataFrame(rouge_dict, index = [f'pegasus'])










In [9]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()

    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.evaluate()

except Exception as e:
    raise e

[2025-10-03 04:11:19,925: INFO: yaml file: config\config.yaml loaded successfully]
[2025-10-03 04:11:19,927: INFO: yaml file: params.yaml loaded successfully]
[2025-10-03 04:11:19,929: INFO: created directory at: artifacts]
[2025-10-03 04:11:19,931: INFO: created directory at: artifacts/model_evaluation]


OSError: Incorrect path_or_model_id: 'artifacts/model_trainer/tokenizer'. Please provide either the path to a local folder or the repo_id of a model on the Hub.