In [1]:
import os
os.chdir('../')
%pwd

'/home/paladin/Downloads/Consumer-Finance-Complaint-Analysis'

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    from_date: str
    to_date: str    
    feature_store_dir: Path 
    downloaded_dir: Path
    failed_downloaded_dir: Path            
    metadata_file_path: Path
    min_start_date: str    
    datasource_url: Path

In [3]:
from datetime import datetime
from financeComplaint.constants import *
from financeComplaint.utils import read_yaml_file, write_yaml_file, create_directories
from financeComplaint.entity.metadata_entity import DataIngestionMetadata

In [None]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,                 
                 params_filepath=PARAMS_FILE_PATH,
                 saved_modelpath=SAVED_MODEL_PATH,
                 ):
       
        self.config = read_yaml_file(config_filepath)
        self.params = read_yaml_file(params_filepath)
        self.saved_modelpath = saved_modelpath
        
        create_directories([self.config.artifacts_root])
        self.timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') 
        
    
    def get_data_ingestion_config(self, from_date=None, to_date=None) -> DataIngestionConfig:
        """
        from date can not be less than min start date

        if to_date is not provided automatically current date will become to date

        """

        config = self.config.data_ingestion
        SUB_ROOT_DIR = os.path.join(config.ROOT_DIR, self.timestamp)
        DOWNLOADED_DIR = os.path.join(SUB_ROOT_DIR,'downloaded_files')
        FAILED_DOWNLOADED_DIR = os.path.join(SUB_ROOT_DIR,'failed_downloaded_files')
        

        create_directories([config.ROOT_DIR, 
                            config.FEATURE_STORE_DIR,
                            DOWNLOADED_DIR, 
                            FAILED_DOWNLOADED_DIR, 
                            ])

        min_start_date = datetime.strptime(config.MIN_START_DATE, "%Y-%m-%d")        
        if from_date is None:
            from_date = min_start_date
        else:
            from_date = datetime.strptime(from_date, "%Y-%m-%d")
        
        if from_date < min_start_date:
            from_date = min_start_date
        
        if to_date is None:
            to_date = datetime.now().strftime("%Y-%m-%d")   

        data_ingestion_metadata= DataIngestionMetadata(config.METADATA_FILE_PATH)

        if data_ingestion_metadata.is_metadata_file_exist:
            metadata_info= data_ingestion_metadata.get_metadata_info()
            from_date = metadata_info.to_date
        
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.ROOT_DIR,
            from_date= from_date,
            to_date= to_date,
            feature_store_dir= config.FEATURE_STORE_DIR,   
            downloaded_dir = DOWNLOADED_DIR,
            failed_downloaded_dir= FAILED_DOWNLOADED_DIR,                     
            metadata_file_path= config.METADATA_FILE_PATH,
            min_start_date= config.MIN_START_DATE,           
            datasource_url= config.DATASOURCE_URL, 

        )


        return data_ingestion_config

In [None]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    