In [20]:
import os
from pathlib import Path

In [6]:
pwd

'/Users/gojuruakshith/YouTube-Fake-Thumbnail-Detector/research'

In [7]:
cd ..

/Users/gojuruakshith/YouTube-Fake-Thumbnail-Detector


In [8]:
from dataclasses import dataclass

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    unzip_data: Path
    validation_status_file: Path
    all_schema: Dict


In [9]:
from src.mlProject.constants import CONFIG_FILE_PATH, PARAM_FILE_PATH, SCHEMA_FILE_PATH
from src.mlProject.utils.common import read_yaml, create_directories


class ConfigurationManager:
    def __init__(self):
        self.config = read_yaml(CONFIG_FILE_PATH)
        self.params = read_yaml(PARAM_FILE_PATH)
        self.schema = read_yaml(SCHEMA_FILE_PATH)

        create_directories([Path(self.config.artifacts_root)])

    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.COLUMNS

        create_directories([Path(config.root_dir)])

        return DataValidationConfig(
            root_dir=Path(config.root_dir),
            unzip_data=Path(config.unzip_data),
            validation_status_file=Path(config.Data_Validation_Status),
            all_schema=schema,
        )


In [18]:
import pandas as pd

class DataValidation:
    def __init__(self, config: DataValidationConfig):
        self.config = config

    def validate_all_columns(self) -> bool:
        try:
            self.config.validation_status_file.parent.mkdir(parents=True, exist_ok=True)

            data = pd.read_csv(self.config.unzip_data)
            data_cols = set(data.columns)

            validation_status = True if "headline" in data_cols else False

            with open(self.config.validation_status_file, "w") as f:
                f.write(f"DATA_VALIDATION_STATUS: {validation_status}\n")

            return validation_status

        except Exception as error:
            raise error


In [19]:
try:
    cm = ConfigurationManager()
    dv_config = cm.get_data_validation_config()
    dv = DataValidation(config=dv_config)
    dv.validate_all_columns()
except Exception as e:
    raise e


[2026-01-06 19:15:35,359: INFO: common: YAML file loaded successfully: config/config.yaml]
[2026-01-06 19:15:35,361: INFO: common: YAML file loaded successfully: params.yaml]
[2026-01-06 19:15:35,362: INFO: common: YAML file loaded successfully: schema.yaml]
[2026-01-06 19:15:35,362: INFO: common: Created directory at: artifacts]
[2026-01-06 19:15:35,363: INFO: common: Created directory at: artifacts/data_validation]
