In [60]:
import os


In [61]:
os.chdir("../")


In [62]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    local_data_file: Path
    unzip_dir: Path
    gdrive_file_id: str


In [64]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionArtifact:
    zip_file_path: Path
    unzip_dir: Path


In [63]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [71]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH,
    ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config["artifacts_root"]])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config["data_ingestion"]

        root_dir = Path(config["root_dir"])
        local_data_file = Path(config["local_data_file"])
        unzip_dir = Path(config["unzip_dir"])

        # Your Google Drive file id
        gdrive_file_id = "1XaNxpHP3XwDyKjEw-1wirLcgLqMRSsV-"

        create_directories([root_dir, unzip_dir, local_data_file.parent])

        return DataIngestionConfig(
            root_dir=root_dir,
            local_data_file=local_data_file,
            unzip_dir=unzip_dir,
            gdrive_file_id=gdrive_file_id,
        )

In [72]:
import gdown
import zipfile
from pathlib import Path
# from cnnClassifier.entity.config_entity import DataIngestionConfig
# from cnnClassifier.entity.artifact_entity import DataIngestionArtifact
from cnnClassifier.utils.common import create_directories
from cnnClassifier.logger.logging import logger

In [73]:
class DataIngestion:
    def __init__(self, data_ingestion_config: DataIngestionConfig):
        self.config = data_ingestion_config

    def download_from_gdrive(self) -> Path:
        """
        Download ZIP from Google Drive based on file ID.
        """
        try:
            url = f"https://drive.google.com/uc?id={self.config.gdrive_file_id}"
            output_path = str(self.config.local_data_file)

            logger.info(f"Downloading dataset from Google Drive: {url}")
            logger.info(f"Saving to: {output_path}")

            gdown.download(url, output_path, quiet=False)

            logger.info("Download completed successfully.")
            return self.config.local_data_file

        except Exception as e:
            raise e

    def extract_zip_file(self, zip_path: Path = None) -> Path:
        """
        Extract the downloaded ZIP.
        """
        try:
            if zip_path is None:
                zip_path = self.config.local_data_file

            unzip_dir = self.config.unzip_dir
            create_directories([unzip_dir])

            logger.info(f"Extracting: {zip_path}")

            with zipfile.ZipFile(zip_path, "r") as zip_ref:
                zip_ref.extractall(unzip_dir)

            logger.info(f"Extraction completed: {unzip_dir}")
            return unzip_dir

        except Exception as e:
            raise e

    def initiate_data_ingestion(self) -> DataIngestionArtifact:
        """
        Run download + extraction.
        """
        logger.info("=== Starting Data Ingestion ===")

        zip_path = self.download_from_gdrive()
        unzip_path = self.extract_zip_file(zip_path)

        artifact = DataIngestionArtifact(
            zip_file_path=zip_path,
            unzip_dir=unzip_path
        )

        logger.info("Data Ingestion Artifact created successfully.")
        logger.info(f"{artifact}")

        return artifact

In [74]:
# from cnnClassifier.config.configuration_manager import ConfigurationManager
# from cnnClassifier.components.data_ingestion import DataIngestion
# from cnnClassifier import logger

In [75]:
class DataIngestionTrainingPipeline:
    def __init__(self):
        pass

    def main(self):
        try:
            logger.info("Stage 01: Data Ingestion started")

            config = ConfigurationManager()
            data_ingestion_config = config.get_data_ingestion_config()

            data_ingestion = DataIngestion(data_ingestion_config)
            data_ingestion.initiate_data_ingestion()

            logger.info("Stage 01: Data Ingestion completed")

        except Exception as e:
            logger.error(e)
            raise e

In [77]:
# from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
# from cnnClassifier import logger

if __name__ == "__main__":
    try:
        obj = DataIngestionTrainingPipeline()
        obj.main()
    except Exception as e:
        logger.error(e)
        raise e


FileNotFoundError: [Errno 2] No such file or directory: 'config\\config.yaml'