1. Data Ingestion

In [3]:
import os

In [1]:
%pwd

'c:\\Users\\Orçamento\\Desktop\\Bike Sales\\VeloAnalytics\\notebooks'

In [4]:
os.chdir('../')

In [5]:
%pwd

'c:\\Users\\Orçamento\\Desktop\\Bike Sales\\VeloAnalytics'

Entity

In [11]:
from dataclasses import dataclass
from pathlib import Path

In [19]:
# --- Data Ingestion Configuration Entity ---
# This defines the structure for the data ingestion configuration.
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_zip_file: Path
    unzip_dir: Path

Config

In [None]:
from src.utils import read_yaml, create_directories
from pathlib import Path

In [21]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = Path("config.yaml")):
        """
        Initializes the ConfigurationManager by reading the main config file.
        It also creates the main artifacts directory.
        """
        self.config = read_yaml(config_filepath)
        create_directories([Path(self.config.artifacts_root)])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        """
        Extracts the data ingestion configuration from the main config file,
        creates its specific artifact directory, and returns it as a 
        DataIngestionConfig object.
        """
        # Get the data_ingestion section from the config file
        config = self.config.data_ingestion

        # Create the specific directory for this component (e.g., artifacts/data_ingestion)
        create_directories([Path(config.root_dir)])

        # Create and return the structured configuration object using the blueprint from the entity file
        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),
            source_zip_file=Path(config.source_zip_file),
            unzip_dir=Path(config.unzip_dir)
        )

        return data_ingestion_config

Components

In [22]:
import os
import zipfile
from pathlib import Path
from src.logging import logger

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        """
        Initializes the DataIngestion component with its configuration.
        """
        self.config = config

    def unzip_source_file(self):
        """
        Unzips the source file into the specified directory from the configuration.
        """
        logger.info(f"Unzipping source file: {self.config.source_zip_file} into {self.config.unzip_dir}")
        
        # Ensure the target directory exists before unzipping
        os.makedirs(self.config.unzip_dir, exist_ok=True)
        
        with zipfile.ZipFile(self.config.source_zip_file, 'r') as zip_ref:
            zip_ref.extractall(self.config.unzip_dir)
            logger.info(f"Successfully unzipped file to {self.config.unzip_dir}")

In [23]:
# --- STAGE 3: DATA TRANSFORMATION ---
STAGE_NAME = "Data Transformation stage"
try:
    # Initialize the configuration manager
    config = ConfigurationManager()
            
    # Get the specific configuration for data ingestion
    data_ingestion_config = config.get_data_ingestion_config()
            
    # Initialize the data ingestion component with the configuration
    data_ingestion = DataIngestion(config=data_ingestion_config)
            
    # Run the unzipping process
    data_ingestion.unzip_source_file()
    logger.info(f">>>>>> Stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
except Exception as e:
    logger.exception(e)
    raise e

[2025-08-27 15:51:01,392: INFO: utils: YAML file loaded successfully: config.yaml]
[2025-08-27 15:51:01,396: INFO: utils: Directory created or already exists: artifacts]
[2025-08-27 15:51:01,398: INFO: utils: Directory created or already exists: artifacts\data_ingestion]
[2025-08-27 15:51:01,400: INFO: 1630349312: Unzipping source file: BI Test.zip into artifacts\data_ingestion\unzipped_data]
[2025-08-27 15:51:01,446: INFO: 1630349312: Successfully unzipped file to artifacts\data_ingestion\unzipped_data]
[2025-08-27 15:51:01,448: INFO: 357816678: >>>>>> Stage Data Transformation stage completed <<<<<<

