In [1]:
import os

In [2]:
pwd%%

'f:\\Files\\DS&ML\\E2E-Credit-Fraud-Detection\\Exp'

In [3]:
os.chdir('../')

In [4]:
pwd%%

'f:\\Files\\DS&ML\\E2E-Credit-Fraud-Detection'

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [6]:
@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    unzip_file: Path
    status_file: Path
    all_schema: dict

In [7]:
from project.constants import *
from project.utils.common import *

In [8]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_PATH, 
        params_filepath=PARAMS_PATH, 
        schema_filepath=SCHEMA_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS
        
        create_directories([config.root_dir])
        
        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            status_file=config.status_file,
            unzip_file=config.unzip_file,
            all_schema=schema,
        )
        return data_validation_config

In [9]:
import pandas as pd

In [10]:
class DataValidation:
    def __init__(self, config):
        self.config = config

    def validation(self) -> bool:
        try:
            data = pd.read_csv(self.config.unzip_file)
            all_cols = list(data.columns)
            expected_cols = set(self.config.all_schema.keys())  # Convert to set for faster lookup

            # Check if all dataset columns are in the schema
            validation_status = set(all_cols).issubset(expected_cols)

            # Write the final validation result
            with open(self.config.status_file, 'w') as f:
                f.write(f"Validation_status: {validation_status}")

            return validation_status

        except Exception as e:
            raise e

In [11]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValidation(data_validation_config)
    data_validation.validation()
except Exception as e:
    logger.error(f"Validation failed: {str(e)}")
except FileNotFoundError as e:
    logger.error(f"Validation failed: {str(e)}")   
    

[2025-04-21 17:44:03,936: INFO: common: yaml file: yaml file\config.yaml loaded successfully]
[2025-04-21 17:44:03,944: INFO: common: yaml file: yaml file\params.yaml loaded successfully]
[2025-04-21 17:44:03,953: INFO: common: yaml file: yaml file\schema.yaml loaded successfully]
[2025-04-21 17:44:03,955: INFO: common: created directory at: artifacts]
[2025-04-21 17:44:03,958: INFO: common: created directory at: artifacts/data_validation]
