#### **At the very first, we will implement content from different production files inside the this research.**

**Workflow**
1. update config.yaml
2. update schema.yaml
3. update params.yaml
4. update the entity
5. update the configuration manager in src config
6. update the components
7. update the pipeline
8. update the main.py
9. update the app.py

In [1]:
%pwd

'c:\\Users\\plvit\\Desktop\\projects-portfolio\\churn-project\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd # Now we are in the project root directory

'c:\\Users\\plvit\\Desktop\\projects-portfolio\\churn-project'

Creating **entity** for data ingestion configuration. It's going to be the return type of our data ingestion component.

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    db_host: str
    db_user: str
    db_password: str
    db_name: str
    query: str


The **configuration manager** is responsible for reading the configuration files and providing configuration objects. It will give us the file path or database connection details or any other configuration needed for different components of the project.

In [5]:
from churn_project.constants import CONFIG_FILE_PATH, SCHEMA_FILE_PATH, PARAMS_FILE_PATH
from churn_project.utils import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir]) # Creates another folder inside artifacts/data_ingestion

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            db_host=config.db_host,
            db_user=config.db_user,
            db_password=config.db_password,
            db_name=config.db_name,
            query=config.query
        )

        return data_ingestion_config


Now we will create the data ingestion **component**

In [6]:
import sys
import os
import csv
from churn_project.logger import logger
from churn_project.utils import get_size
import pymysql
from churn_project.exception import CustomException

In [7]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def fetch_data(self) -> str:
        logger.info("Starting data ingestion process")

        # Connect to the database
        connection = pymysql.connect(
            host=self.config.db_host,
            user=self.config.db_user,
            password=self.config.db_password,
            database=self.config.db_name
        )

        try:
            with connection.cursor() as cursor:
                cursor.execute(self.config.query)
                data = cursor.fetchall()
                logger.info("Data fetched successfully from MySQL database.")
                return data
                # # Write data to CSV file
                # csv_file_path = os.path.join(self.config.root_dir, "ingested_data.csv")
                # with open(csv_file_path, mode='w', newline='') as file:
                #     writer = csv.writer(file)
                #     writer.writerows(data)

                # logger.info(f"Data ingestion completed. File saved at: {csv_file_path} with size: {get_size(csv_file_path)}")

                # return csv_file_path
                # Better to save csv in a separate method
        except Exception as e:
            logger.error(f"Error during data ingestion: {e}")
            raise CustomException(e, sys)
        
        finally:
            connection.close()
        
    def save_data_to_csv(self, data) -> Path:
        try:
            csv_file_path = Path(self.config.root_dir) / "ingested_data.csv"
            logger.info(f"Saving data to {csv_file_path}")

            with csv_file_path.open(mode='w', newline='') as file:
                writer = csv.writer(file)
                writer.writerows(data)

            logger.info(f"Data saved to {csv_file_path} with size {get_size(csv_file_path)}")
            return csv_file_path
        except Exception as e:
            logger.error(f"Error saving data to CSV: {e}")
            raise CustomException(e, sys)
    

In [8]:
try:
    config_manager = ConfigurationManager()
    data_ingestion_config = config_manager.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data = data_ingestion.fetch_data()
    csv_file_path = data_ingestion.save_data_to_csv(data)
except Exception as e:
    raise e


[2025-10-28 19:31:17,130: INFO: utils: yaml file: config\config.yaml loaded successfully]
[2025-10-28 19:31:17,141: INFO: utils: yaml file: config\params.yaml loaded successfully]
[2025-10-28 19:31:17,152: INFO: utils: yaml file: config\schema.yaml loaded successfully]
[2025-10-28 19:31:17,152: INFO: utils: created directory at: artifacts]
[2025-10-28 19:31:17,165: INFO: utils: created directory at: artifacts/data_ingestion]
[2025-10-28 19:31:17,165: INFO: 745233977: Starting data ingestion process]
[2025-10-28 19:31:17,979: INFO: 745233977: Data fetched successfully from MySQL database.]
[2025-10-28 19:31:17,990: INFO: 745233977: Saving data to artifacts\data_ingestion\ingested_data.csv]
[2025-10-28 19:31:18,111: INFO: 745233977: Data saved to artifacts\data_ingestion\ingested_data.csv with size ~ 1185 KB]
