In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ambig\\jupiter_notebook\\Projects\\Credit-Risk-Model\\research'

In [3]:
os.chdir("../")


In [4]:
%pwd

'c:\\Users\\ambig\\jupiter_notebook\\Projects\\Credit-Risk-Model'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass

class DataIngestionConfig:
    root_dir : Path
    customers_data : Path
    loans_data : Path
    bureau_data : Path
    output_path : Path    

In [6]:
from src.Credit_Risk_Model.constants import *
from src.Credit_Risk_Model.utils.common import read_yaml,create_directories,load_df,save_df
from  src.Credit_Risk_Model.logger import logger


[2024-11-06 10:05:47,676] INFO: __init__ : 26] Credit Risk Model Building Started


In [7]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),
            customers_data=Path(config.customers_data),
            loans_data=Path(config.loans_data),
            bureau_data=Path(config.bureau_data),
            output_path=Path(config.output_path)
        )
        return data_ingestion_config
                           

In [8]:
import os
import pandas as pd
from  src.Credit_Risk_Model.logger import logger

class DataIngestion:
    def __init__(self, config : DataIngestionConfig):
        self.customers_data = config.customers_data
        self.loans_data = config.loans_data
        self.bureau_data = config.bureau_data
        self.output_path = config.output_path
    
    def load_data(self):
        logger.info("Loading customers data...")
        customers_df = load_df(self.customers_data)
        
        logger.info("Loading loans data...")
        loans_df = load_df(self.loans_data)
        
        logger.info("Loading bureau data...")
        bureau_df = load_df(self.bureau_data)
        
        logger.info(f"The Shape of customers data: {customers_df.shape}")
        logger.info(f"The Shape of loans data: {loans_df.shape}")
        logger.info(f"The Shape of bureau data: {bureau_df.shape}")
        
        df = pd.merge(customers_df, loans_df, on='cust_id')
        df = pd.merge(df, bureau_df, on='cust_id')
        
        save_df(df=df, file_path=self.output_path)        
    

In [9]:
try:
    # Load configurations
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    
    # Load data
    data_ingestion = DataIngestion(data_ingestion_config)
    data_ingestion.load_data()
    
    logger.info("Data ingestion completed successfully.")
    
except Exception as e:
    logger.error(f"An error occurred during data ingestion: {str(e)}")
    raise e

[2024-11-06 10:05:48,348] INFO: common : 32] Successfully loaded yaml file: config\config.yaml
[2024-11-06 10:05:48,348] INFO: common : 32] Successfully loaded yaml file: params.yaml
[2024-11-06 10:05:48,362] INFO: common : 32] Successfully loaded yaml file: schema.yaml
[2024-11-06 10:05:48,364] INFO: common : 55] Created directory: artifacts
[2024-11-06 10:05:48,366] INFO: common : 55] Created directory: artifacts/data_ingestion
[2024-11-06 10:05:48,367] INFO: 334133869 : 13] Loading customers data...
[2024-11-06 10:05:48,443] INFO: common : 79] Successfully loaded DataFrame from: data\customers.csv
[2024-11-06 10:05:48,444] INFO: 334133869 : 16] Loading loans data...
[2024-11-06 10:05:48,556] INFO: common : 79] Successfully loaded DataFrame from: data\loans.csv
[2024-11-06 10:05:48,557] INFO: 334133869 : 19] Loading bureau data...
[2024-11-06 10:05:48,579] INFO: common : 79] Successfully loaded DataFrame from: data\bureau_data.csv
[2024-11-06 10:05:48,579] INFO: 334133869 : 22] The S