In [1]:
import os

In [2]:
os.getcwd()

'e:\\STUDY\\TENSORFLOW\\Projects\\1_CNN_Project\\research'

In [3]:
os.chdir("../")

In [4]:
pwd

'e:\\STUDY\\TENSORFLOW\\Projects\\1_CNN_Project'

In [5]:
# Update src/cnn_classifier/entity/config_entity.py.

from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:

    """
    dataclasses provides a decorator (@dataclass) to automatically generate methods like
    __init__, __repr__, and __eq__ for classes, thus simplifying the creation
     of data containers.

    root_dir, source_url, local_data_file, unzip_dir : arguments to __init__ method.

    """
    root_dir : Path
    source_url : str
    local_data_file : Path
    unzip_dir : Path
    

In [6]:
from cnn_classifier.constants import *
from cnn_classifier.utils.common import read_yaml, create_directories

In [7]:
# Update the src/cnn_classifier/config/configuration.py file.

class ConfigurationManager:

    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):

        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)

        # Creating directory
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:

        """
        returns the object of DataIngestionConfig class
        """
        config = self.config.data_ingestion

        # Create a directory
        create_directories([config.root_dir])

        # Creating an object of DataIngestionConfig class
        data_ingestion_config=DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_url,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir
            )

        return data_ingestion_config

In [None]:
# Update the src/cnn_classifier/components/data_ingestion.py file

import os
import zipfile
import gdown
from cnn_classifier import logger
from cnn_classifier.utils.common import get_size

In [9]:
class DataIngestion:

    def __init__(self, config:DataIngestionConfig):

        # Object of DataIngestionConfig class
        self.config = config


    def download_file(self) ->str :

        """
        fetch data from the url.
        """

        try :
            
            # url from where data will be downloaded
            dataset_url=self.config.source_url
            # name of the filepath (in the local system) where file being downloaded from google drive will be saved.
            zip_download_dir=self.config.local_data_file
            # creating root directory for data ingestion if not already been created
            os.makedirs(name=self.config.root_dir , exist_ok=True)

            file_id=dataset_url.split("/")[-2]
            prefix="https://drive.google.com/uc?/export=download&id="
            # downloading the file from gdrive
            file_url=prefix+file_id
            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")
            gdown.download(url=file_url,output=zip_download_dir)
            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")

        except Exception as e:
            raise e
        
    def extract_zip_file(self):

        """
        This method extracts the zip file.
        """
        unzip_dir_path=self.config.unzip_dir

        # Creating the directory where data zip file will be extracted, if not created already
        os.makedirs(name=unzip_dir_path, exist_ok=True)

        with zipfile.ZipFile(file=self.config.local_data_file, mode='r') as zip_ref:
            
            # path : specifies a directory to extract to.
            zip_ref.extractall(path=unzip_dir_path)



In [None]:
# Pipeline : update the src/cnn_classifier/pipeline/Stage01_data_ingestion.py file

try :
    
    config=ConfigurationManager()
    data_ingestion_config=config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()

except Exception as e:
    raise e



[2025-04-25 10:33:32,826 : INFO : common : yaml file : config\config.yaml loaded successfully]
[2025-04-25 10:33:32,834 : INFO : common : yaml file : params.yaml loaded successfully]
[2025-04-25 10:33:32,836 : INFO : common : Created directory at : artifacts]
[2025-04-25 10:33:32,837 : INFO : common : Created directory at : artifacts/data_ingestion]
[2025-04-25 10:33:32,839 : INFO : 841086673 : Downloading data from https://drive.google.com/file/d/1wLkVKHAOdJHmufkkgP65uGe3WLyu3WsO/view?usp=sharing into file artifacts/data_ingestion/data.zip]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1wLkVKHAOdJHmufkkgP65uGe3WLyu3WsO
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1wLkVKHAOdJHmufkkgP65uGe3WLyu3WsO&confirm=t&uuid=5241ee46-196a-4168-89d9-36bc516ab8db
To: e:\STUDY\TENSORFLOW\Projects\1_CNN_Project\artifacts\data_ingestion\data.zip
100%|██████████| 80.3M/80.3M [00:16<00:00, 4.96MB/s]

[2025-04-25 10:33:54,535 : INFO : 841086673 : Downloaded data from https://drive.google.com/file/d/1wLkVKHAOdJHmufkkgP65uGe3WLyu3WsO/view?usp=sharing into file artifacts/data_ingestion/data.zip]



