In [None]:
import os
import pandas as pd
import gdown
from pathlib import Path
from dataclasses import dataclass
from fraud_detection_project.constants import *
from fraud_detection_project.utils.common import read_yaml, create_directories

In [None]:
@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: Path
    data_file: Path
    all_schema: dict

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
        schema_filepath=SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS

        # Ensure the root folder exists
        create_directories([config.root_dir])

        status_file_path = Path(config.STATUS_FILE)
        data_file_path = Path(config.data_dir)

        return DataValidationConfig(
            root_dir=config.root_dir,
            STATUS_FILE=status_file_path,
            data_file=data_file_path,
            all_schema=schema
        )

In [1]:
class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config

    def validate_all_columns(self) -> bool:
        try:
            data = pd.read_csv(self.config.data_file)
            all_cols = list(data.columns)
            all_schema = self.config.all_schema.keys()

            validation_status = all(col in all_schema for col in all_cols)

            # Write validation status to file
            with open(self.config.STATUS_FILE, 'w') as f:
                f.write(f"Validation status: {validation_status}")

            return validation_status

        except Exception as e:
            raise e

NameError: name 'DataValidationConfig' is not defined

In [None]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()

    gdrive_file_id = '1JRNRK-Iv5jD3aqf9F6Pn-eirYZreGbsi'
    local_csv_path = data_validation_config.data_file

    # Download CSV from Google Drive if it doesn't exist,
    if not local_csv_path.exists():
        local_csv_path.parent.mkdir(parents=True, exist_ok=True)
        url = f'https://drive.google.com/uc?id={gdrive_file_id}'
        gdown.download(url, str(local_csv_path), quiet=False)
    else:
        print(f"{local_csv_path} already exists. Skipping download.")

    # Run validation
    data_validation = DataValidation(config=data_validation_config)
    status = data_validation.validate_all_columns()
    print(f"Data validation completed. Status: {status}")

except Exception as e:
    raise e