In [1]:
import os
os.chdir("x:\DL\Projects\MLOPs")
%pwd

'x:\\DL\\Projects\\MLOPs'

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data: Path
    unzip_dir: Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    status_file_dir: Path
    req_files: list

In [3]:
from scripts.MLOPs.constants import *
from scripts.MLOPs.utils.common import read_yaml, create_directories
from scripts.MLOPs.entity.config_entity import *

In [4]:


class ConfigurationManager:
    def __init__(self,config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_dataingestion_config(self)-> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            source_URL= config.source_URL,
            local_data= config.local_data,
            unzip_dir= config.unzip_dir
            )
        return data_ingestion_config
    
    def get_datavalidation_config(self)->DataValidationConfig:
        config = self.config.data_validation
        create_directories([config.data_val_dir])
        data_validation_config = DataValidationConfig(
            root_dir=config.data_val_dir,
            status_file_dir= config.data_val_status,
            req_files= config.data_val_req
        )
        return data_validation_config

In [5]:
import os,sys
from scripts.MLOPs import logger
from scripts.MLOPs.exception import AppException
from scripts.MLOPs.utils.common import update_train_yaml


class DataValidation:
    def __init__(self, config: DataValidationConfig, param: DataIngestionConfig):
        self.config = config
        self.param = param

    def validate_files(self)-> bool:
        try:
            validation_status = None
            all_files = os.listdir(self.param.unzip_dir)
            os.makedirs(self.config.root_dir, exist_ok=True)

            for file in all_files:
                if file not in self.config.req_files:
                    validation_status = False
                    with open(self.config.status_file_dir,'w') as f:
                        f.write(f"validation status: {validation_status}")
                else:
                    validation_status = True
                    with open(self.config.status_file_dir, 'w') as f:
                        f.write(f"validation status: {validation_status}")
            print(all_files)
            return validation_status
        except Exception as e:
            raise AppException(e,sys)
        
    def update_yaml(self):
        yamlpath = os.path.join(self.param.unzip_dir,"data.yaml")
        pathdir = self.param.unzip_dir
        update_train_yaml(yamlpath,pathdir)
        logger.info(f"following changes has been made \n train and valid path inside data.yaml has been modified \n path : {pathdir} has been added to data.yaml file ")
       
        


In [6]:
import sys
from scripts.MLOPs import logger
from scripts.MLOPs.config.configuration import ConfigurationManager
from scripts.MLOPs.exception import AppException
from scripts.MLOPs.components.data_ingestion import DataIngestion


In [7]:
STAGE2 = "Data Validation Stage"

try:
    config = ConfigurationManager()
    dataval_param = config.get_dataingestion_config()
    dataval_config = config.get_datavalidation_config()
    data_validation = DataValidation(config=dataval_config, param=dataval_param)
    data_validation.validate_files()
    data_validation.update_yaml()
except Exception as e:
    raise AppException(e, sys)

[2024-07-29 00:59:40,326: INFO: common: yaml file: config\config.yaml loaded sucessfully]
[2024-07-29 00:59:40,326: INFO: common: yaml file: params.yaml loaded sucessfully]
[2024-07-29 00:59:40,329: INFO: common: created directory at artifacts]
[2024-07-29 00:59:40,329: INFO: common: created directory at artifacts/data_ingestion]
[2024-07-29 00:59:40,329: INFO: common: created directory at artifacts/data_validation]
['data.yaml', 'README.dataset.txt', 'README.roboflow.txt', 'test', 'train', 'valid']
[2024-07-29 00:59:40,333: INFO: 462659045: following changes has been made 
 train and valid path inside data.yaml has been modified 
 path : artifacts/data_ingestion/data has been added to data.yaml file ]


NameError: name 'self' is not defined