In [1]:
import os

In [2]:
%pwd

'f:\\Files\\DSML\\Condition2Cure\\notebook'

In [3]:
os.chdir('../')
%pwd

'f:\\Files\\DSML\\Condition2Cure'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    status_file: str
    unzip_data_dir: Path
    all_schema: dict

In [5]:
from Condition2Cure.constants import *
from Condition2Cure.utils.helpers import *
from Condition2Cure.utils.execptions import CustomException
from Condition2Cure import logger
import pandas as pd

In [6]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH, 
        params_filepath=PARAMS_FILE_PATH, 
        schema_filepath=SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.columns
        
        create_directories([config.root_dir])
        
        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            status_file=config.status_file,
            unzip_data_dir=config.unzip_data_dir,
            all_schema=schema,
        )
        return data_validation_config

In [7]:
class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config

    def validate_all_columns(self) -> bool:
        try:
            data = pd.read_csv(self.config.unzip_data_dir)
            all_cols = list(data.columns)

            try:
                all_schema = list(self.config.all_schema.keys())
            except AttributeError:
                all_schema = list(self.config.all_schema)

            validation_status = all(col in all_cols for col in all_schema)

            status_dict = {"Validation status": validation_status}
            
            save_json(Path(self.config.status_file), status_dict)


            return validation_status

        except Exception as e:
            raise e

In [8]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(config=data_validation_config)
    data_validation.validate_all_columns()

except Exception as e:
    raise CustomException(str(e), sys)

[2025-06-21 16:02:12,713: INFO: helpers: yaml file: config\config.yaml loaded successfully]
[2025-06-21 16:02:12,722: INFO: helpers: yaml file: config\params.yaml loaded successfully]
[2025-06-21 16:02:12,726: INFO: helpers: yaml file: config\schema.yaml loaded successfully]
[2025-06-21 16:02:12,729: INFO: helpers: created directory at: artifacts]
[2025-06-21 16:02:12,731: INFO: helpers: created directory at: artifacts/data_validation]
[2025-06-21 16:02:14,007: INFO: helpers: json file saved at: artifacts\data_validation\status.json]
