In [2]:
%pwd

'/workspaces/Text-Summarization/research'

In [3]:
import os
os.chdir('../')

In [4]:
%pwd

'/workspaces/Text-Summarization'

## Entities 

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [12]:
@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path 
    status_file: str
    all_required_files: list

## configurtions manager in src/config

In [7]:
from src.textSummarization.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.textSummarization.utils.common import read_yaml, create_directories

In [14]:
class ConfigurationManager():
    def __init__(
            self,
            config_filepath=CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH
    ):
        print("Configuration Manager Initiated")
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        
        create_directories([self.config.data_validation.root_dir])
        return DataValidationConfig(
            root_dir = Path(self.config.data_validation.root_dir),
            status_file=self.config.data_validation.STATUS_FILE,
            all_required_files=self.config.data_validation.ALL_REQUIRED_FILES
        )
            

## Components

In [9]:
import os
from textSummarization.logging import logger

In [10]:
class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config


    def validate_all_files_exist(self) -> bool:
        try:
            validation_status = all(each in os.listdir(Path("artifacts/data_ingestion/samsum_dataset")) for each in self.config.all_required_files)
            with open(Path(self.config.status_file), 'w') as stat_file:
                stat_file.write(f"Validation Status: {str(validation_status)}")
            return validation_status
    
        except Exception as e:
            raise e



## Pipeline

In [15]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(config=data_validation_config)
    data_validation.validate_all_files_exist()
except Exception as e:
    raise e


Configuration Manager Initiated
[2023-10-08 10:51:11,741: INFO: common] yaml file: config/config.yaml loaded successfully
[2023-10-08 10:51:11,742: INFO: common] yaml file: params.yaml loaded successfully
[2023-10-08 10:51:11,743: INFO: common] directory: artifacts created successfully
[2023-10-08 10:51:11,744: INFO: common] directory: artifacts/data_validation created successfully
[2023-10-08 10:51:11,742: INFO: common] yaml file: params.yaml loaded successfully
[2023-10-08 10:51:11,743: INFO: common] directory: artifacts created successfully
[2023-10-08 10:51:11,744: INFO: common] directory: artifacts/data_validation created successfully
