In [64]:
import pandas as pd
import os

In [65]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: str
    unzip_file_dir: str
    STATUS_FILE: str
    all_schema: dict

In [66]:
from mlProject.constants import *


In [72]:
from mlProject.utils.common import (
    # create_directories,
      read_yaml)
from mlProject import logger
def create_directories(paths: list, verbose=True) -> None:
    """
    Creates directories at the specified paths.

    This function iterates over a list of paths and creates each directory using the `os.makedirs()` function.
    If a directory already exists, it will not be recreated. If the `verbose` parameter is set to `True`,
    the function will log a message for each directory created using the `logger.info()` function.

    Parameters:
    paths (list): A list of strings representing the paths where directories need to be created.
    verbose (bool, optional): A flag indicating whether to log information about each created directory. Defaults to True.

    Returns:
    None
    """
    for path in paths:
        os.makedirs(path, exist_ok=True)
        if verbose:
            logger.info(f"Created directory at: {path}")


class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH
            ) -> None:
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_validation_configuration(self) -> DataValidationConfig:

        config  = self.config.data_validation
        schema = self.schema.COLUMNS

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            all_schema= schema,
            root_dir= config.root_dir,
            STATUS_FILE= config.STATUS_FILE,
            unzip_file_dir=config.unzip_file_dir
        )

        return data_validation_config

In [73]:
import os
from mlProject import logger

In [74]:
class DataValidation:

    def __init__(self, config: DataValidationConfig) -> None:
        
        self.config = config
    
    def validate_all_columns(self) -> bool:

        try:
            validation_status = None
            data = pd.read_csv(self.config.unzip_file_dir)
            all_cols = list(data.columns)
            all_schema = self.config.all_schema.keys()

            for col in all_cols:
                if col not in all_schema:
                    validation_status = False

                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"validation status {validation_status}")
                
                else:
                    validation_status = True
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"validation status {validation_status}")
            
            return validation_status

        except Exception as e:
            raise e



In [75]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_configuration()
    data_validation = DataValidation(config=data_validation_config)
    data_validation.validate_all_columns()
except Exception as e:
    raise e

[2024-08-15 18:04:11,127: INFO: common: Yaml file : config/config.yaml loaded successfully]
[2024-08-15 18:04:11,135: INFO: common: Yaml file : params.yaml loaded successfully]
[2024-08-15 18:04:11,166: INFO: common: Yaml file : schema.yaml loaded successfully]
[2024-08-15 18:04:11,183: INFO: 1077476401: Created directory at: artifacts]
[2024-08-15 18:04:11,204: INFO: 1077476401: Created directory at: artifacts/data_validation]
