In [None]:
import os

In [None]:
%pwd

In [None]:
os.chdir("../")

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen =  True)
class DataValidationConfig:
    raw_data_dir: Path
    validation_data_dir: Path
    validation_file_name: str
    validation_file_path: Path
    validation_schema_path: Path
    validation_schema_name: str
    validation_schema_file_path: Path
    

In [None]:
from mlproject.constants import *
from mlproject.utils.common import read_yaml,create_directory

In [None]:
class ConfigurationManager:
    def __init__(self,config:DataValidationConfig):
        self.config = config
        self.raw_data_dir = config.raw_data_dir
        self.validation_data_dir = config.validation_data_dir
        self.validation_file_name = config.validation_file_name
        self.validation_file_path = config.validation_file_path
        self.validation_schema_path = config.validation_schema_path
        self.validation_schema_name = config.validation_schema_name
        self.validation_schema_file_path = config.validation_schema_file_path

    def get_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validition
        schema = self.schema.COLUMNS

        create_directories([config.root_dir])
        
        data_validation_config = DataValidationConfig(
            raw_data_dir = Path(config.raw_data_dir),
            validation_data_dir = Path(config.validation_data_dir),
            validation_file_name = config.validation_file_name,
            validation_file_path = Path(config.validation_file_path),
            validation_schema_path = Path(config.validation_schema_path),
            validation_schema_name = config.validation_schema_name,
            validation_schema_file_path = Path(config.validation_schema_file_path)
        )

        return data_validation_config


    
        return self.config

In [None]:
import os
from mlProject import logger
import pandas as pd

In [None]:
class DataValidation:
    def __init__(self,config:DataValidationConfig):
        self.config = config
        self.logger = logger.get_logger(__name__)

    def validate_all_columns(self) -> bool:
        try: 
            validation_status = None

            data = pd.read_csv(self.config.validation_file_path)
            all_cols = list(data.columns)

            all_schema = self.config.all_schema.keys()

            for col in all_cols:
                if col not in all_schema:
                    self.logger.error(f"Column {col} not in schema")
                    validation_status = False
                else:
                    validation_status = True
                    with open (self.config.validation_schema_file_path) as file:
                        schema = yaml.safe_load(file)
                        for col in all_cols:
                            if col not in schema['columns']:
                                self.logger.error(f"Column {col} not in schema")
                                validation_status = False
                            else:
                                validation_status = True

In [None]:
try :
    config = ConfigurationManager()
    data_validation = DataValidation(config.get_validation_config())
    data_validation.validate_all_columns()
    data_validation.validate_all_columns()
except Exception as e:
    raise e
    