In [1]:
%pwd

'/home/jovyan/workspace/Text-Summarization/research'

In [2]:
import os
os.chdir('../')

In [5]:
%pwd

'/home/jovyan/workspace/Text-Summarization'

## Entities 

In [6]:
from dataclasses import dataclass
from pathlib import Path

In [7]:

@dataclass(frozen=True)
class EvaluationConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    tokenizer_path: Path
    metric_file_name: Path
    
    

## configurtions manager in src/config

In [9]:
from textSummarization.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from textSummarization.utils.common import read_yaml, create_directories

In [10]:
class ConfigurationManager():
    def __init__(
            self,
            config_filepath=CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH
    ):
        print("Configuration Manager Initiated")
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_validation_config(self) -> EvaluationConfig:
        eval_config = EvaluationConfig(
            root_dir=Path(self.config.model_evaluation.root_dir),
            data_path=Path(self.config.model_evaluation.data_path),
            model_path=Path(self.config.model_evaluation.model_path),
            tokenizer_path=Path(self.config.model_evaluation.tokenizer_path),
            metric_file_name=Path(self.config.model_evaluation.metric_file_name)
        )
        return eval_config


## Components

In [11]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_metric, load_from_disk, load_dataset
import torch
import pandas as pd
from tqdm import tqdm

In [15]:
class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config
        
        
    def generate_batch_sized_chunks(self, list_of_elements, batch_size):
         """
         split the dataset into batches so we can process_simulaneously
         yield successful batch sized chunks from list_of elements
         
        """
            for i in range(0, len(list_of_elements), batch_size):
                yield list_of_elements[i:i+batch_size]
                
    
    

## Pipeline

In [16]:
try:
    config = ConfigurationManager()
    val_config = config.get_validation_config()
    evaluation = Evaluation(config=val_config)
    evaluation.evaluation()
    evaluation.save_score()
    
except Exception as e:
    raise e


Configuration Manager Initiated
[ 2023-09-28 07:06:40,889: INFO: common:  yaml file: config/config.yaml loaded successfully]
[ 2023-09-28 07:06:40,891: INFO: common:  yaml file: params.yaml loaded successfully]
[ 2023-09-28 07:06:40,892: INFO: common:  directory: artifacts created successfully]
[ 2023-09-28 07:06:40,893: INFO: common:  directory: artifacts created successfully]
Found 240 images belonging to 2 classes.
[ 2023-09-28 07:07:23,566: INFO: common:  json file: scores.json saved successfully]
