#### **At the very first, we will implement content from different production files inside the this research.**

**Workflow**
1. update config.yaml
2. update schema.yaml
3. update params.yaml
4. update the entity
5. update the configuration manager in src config
6. update the components
7. update the pipeline
8. update the main.py
9. update the app.py

In [1]:
%pwd

'c:\\Users\\plvit\\Desktop\\projects-portfolio\\churn-project\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd # Now we are in the project root directory

'c:\\Users\\plvit\\Desktop\\projects-portfolio\\churn-project'

Creating **entity** for data validation configuration. It's going to be the return type of our data validation component.

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: str
    unzip_data_dir: Path
    all_schema: dict

The **configuration manager** is responsible for reading the configuration files and providing configuration objects. It will give us configuration needed for different components of the project.

In [14]:
from churn_project.constants import CONFIG_FILE_PATH, SCHEMA_FILE_PATH, PARAMS_FILE_PATH
from churn_project.utils import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            STATUS_FILE=config.STATUS_FILE,
            unzip_data_dir = config.unzip_data_dir,
            all_schema=schema,
        )

        return data_validation_config


Now we will create the data validation **component**

In [15]:
import os
import pandas as pd

In [16]:
class DataValiadtion:
    def __init__(self, config: DataValidationConfig):
        self.config = config
    
    def validate_all_columns(self)-> bool:
        try:
            validation_status = None

            data = pd.read_csv(self.config.unzip_data_dir)
            all_cols = list(data.columns)

            all_schema = self.config.all_schema.keys()

            
            for col in all_cols:
                if col not in all_schema:
                    validation_status = False
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"Validation status: {validation_status}")
                else:
                    validation_status = True
                    with open(self.config.STATUS_FILE, 'w') as f:
                        f.write(f"Validation status: {validation_status}")

            return validation_status
        
        except Exception as e:
            raise e

**pipeline**

In [17]:
try:
    config = ConfigurationManager()
    data_validation_config = config.get_data_validation_config()
    data_validation = DataValiadtion(config=data_validation_config)
    data_validation.validate_all_columns()
except Exception as e:
    raise e


[2025-10-29 12:57:34,333: INFO: utils: yaml file: config\config.yaml loaded successfully]
[2025-10-29 12:57:34,339: INFO: utils: yaml file: config\params.yaml loaded successfully]
[2025-10-29 12:57:34,349: INFO: utils: yaml file: config\schema.yaml loaded successfully]
[2025-10-29 12:57:34,351: INFO: utils: created directory at: artifacts]
[2025-10-29 12:57:34,354: INFO: utils: created directory at: artifacts/data_validation]
