In [1]:
import os

In [2]:
%pwd

'd:\\End-to-End-Wine-Quality-predidection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\End-to-End-Wine-Quality-predidection'

In [5]:
#Prepare Entity
from dataclasses import dataclass

from pathlib import Path


@dataclass
class DataValidationConfig:
    root_dir: Path
    unzip_data_dir: Path
    STATUS_FILE: Path
    all_schema:dict


In [6]:
#Configuration Manager

from ML_Project.constants import *
from ML_Project.entity.config_entity import DataIngestionConfig
from ML_Project.utils.common import create_directories


class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath= SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([Path(self.config.artifact_root)])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
    
        config = self.config.data_ingestion
        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir,
            artifact_dir=self.config.artifact_root
        )
       
        return data_ingestion_config
    
    def get_data_validation_config(self) -> DataValidationConfig:
        config=self.config.data_validation
        root_dir=Path(config.root_dir)
        create_directories([root_dir])
        data_validation_config=DataValidationConfig(
            root_dir=Path(config.root_dir),
            unzip_data_dir=Path(config.unzip_data_dir),
            STATUS_FILE=Path(config.STATUS_FILE),
            all_schema=self.schema
        )
        return data_validation_config


In [7]:
obj=ConfigurationManager()
obj.get_data_validation_config()

[2025-10-04 21:11:52,229] :INFO:common: Created directory at: artifacts :
[2025-10-04 21:11:52,232] :INFO:common: Created directory at: artifacts\data_validation :


DataValidationConfig(root_dir=WindowsPath('artifacts/data_validation'), unzip_data_dir=WindowsPath('artifacts/data_ingestion/WineQT.csv'), STATUS_FILE=WindowsPath('artifacts/data_validation/status.txt'), all_schema=ConfigBox({'COLUMNS': {'fixed acidity': 'float64', 'volatile acidity': 'float64', 'citric acid': 'float64', 'residual sugar': 'float64', 'chlorides': 'float64', 'free sulfur dioxide': 'float64', 'total sulfur dioxide': 'float64', 'density': 'float64', 'pH': 'float64', 'sulphates': 'float64', 'alcohol': 'float64', 'quality': 'int64'}, 'TARGET_COLUMN': {'name': 'quality'}}))

In [8]:
#Create a Validation Component
from ML_Project.logging import logger
import pandas as pd


class DataValidation:
    def __init__(self, config:DataValidationConfig):
        self.config=config

    def validate_data_columns(self):
        # Implement validation logic here
        try:
            df=pd.read_csv(self.config.unzip_data_dir)
            expected_columns=self.config.all_schema["COLUMNS"]
            for column in expected_columns:
                if column not in df.columns:
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"Validation Status: {column}: FAILED")
                    logger.info(f"Column {column} is missing in the data.")
                    return
            with open(self.config.STATUS_FILE, 'w') as f:
                f.write("Validation Status: PASSED")
                logger.info("All columns are present in the data.")
        except Exception as e:
            logger.error(f"Error during column validation: {e}")
            raise e

    def validate_data_types(self):
        try:
            df=pd.read_csv(self.config.unzip_data_dir)
            expected_dtypes=self.config.all_schema["COLUMNS"]
            for col,dtype in df.dtypes.items():
                if str(dtype) != expected_dtypes.get(col) and (col != "Id"):
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write("Validation Status: FAILED")
                    logger.info(f"Data type mismatch for column {col}: expected {expected_dtypes.get(col)}, got {dtype}")
                    return

            with open(self.config.STATUS_FILE, 'w') as f:
                f.write("Validation Status: PASSED")
        except Exception as e:
            logger.error(f"Error during data type validation: {e}")
            raise e


In [9]:
try:
    config_manager=ConfigurationManager()
    data_validation_config=config_manager.get_data_validation_config()
    data_validation=DataValidation(config=data_validation_config)
    data_validation.validate_data_columns()
    data_validation.validate_data_types()
except Exception as e:
    logger.error(f"Error in Data Validation: {e}")
    raise e

[2025-10-04 21:11:52,700] :INFO:common: Created directory at: artifacts :
[2025-10-04 21:11:52,701] :INFO:common: Created directory at: artifacts\data_validation :
[2025-10-04 21:11:52,707] :INFO:2391972070: All columns are present in the data. :
