In [1]:
import os

In [2]:
%pwd

'd:\\Image_Classification_CNN_and_MLFlow\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Image_Classification_CNN_and_MLFlow'

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [6]:
#entity
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir : Path
    source_url : str
    local_data_file : Path
    unzip_dir : Path

In [12]:
from src.CNNClassifier.constants import *
from src.CNNClassifier.utils.common import read_yaml,create_directories

In [17]:
class ConfigurationManager:
    # def __init__(self,config_file_path = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH):
    def __init__(self,config_file_path = CONFIG_FILE_PATH):

        self.config=read_yaml(config_file_path)
        self.params=read_yaml(Path("params.yaml"))

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config=self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config =DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_url,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir

        )

        return data_ingestion_config
    




In [24]:
import logging
import zipfile
import gdown
from src.CNNClassifier import logger
from src.CNNClassifier import config

class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config = config

    def download_data(self) -> str:
        try:
            
            dataset_url = self.config.source_url
            zipfile_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion",exist_ok=True)
            logger.info(f"Downloading data from {dataset_url} to {zipfile_dir}")
            file_id = dataset_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix+file_id,zipfile_dir )
            logger.info(f"Data downloaded successfully")

        except Exception as e:
            logger.info("Data download failed {e}")
            raise e
        
    def extract_zipfile(self):
        unzip_path=self.config.unzip_dir
        os.makedirs(unzip_path,exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file,'r') as zip_ref:
            zip_ref.extractall(unzip_path)
        logger.info(f"Data Extracted successfully from the Zip file")


        

    

In [25]:
# pipeline of all the entities and components

try:
    config=ConfigurationManager()
    data_ingestion_config=config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.download_data()
    data_ingestion.extract_zipfile()
except Exception as e:
    raise e


[2024-03-18 18:09:21,491: INFO:common:yaml file: config\config.yaml loaded successfully]
[2024-03-18 18:09:21,491: INFO:common:yaml file: params.yaml loaded successfully]
[2024-03-18 18:09:21,502: INFO:common:created directory at: artifacts]
[2024-03-18 18:09:21,536: INFO:common:created directory at: artifacts/data_ingestion]
[2024-03-18 18:09:21,544: INFO:2444971426:Downloading data from https://drive.google.com/file/d/1Q6afEKxlsDSo-FBg6r1mLP5kRJwwcbZA/view?usp=sharing to artifacts/data_ingestion/data.zip]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1Q6afEKxlsDSo-FBg6r1mLP5kRJwwcbZA
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1Q6afEKxlsDSo-FBg6r1mLP5kRJwwcbZA&confirm=t&uuid=eb57f64c-c669-4f36-b2c2-c816313f2367
To: d:\Image_Classification_CNN_and_MLFlow\artifacts\data_ingestion\data.zip
100%|██████████| 49.0M/49.0M [00:02<00:00, 19.5MB/s]

[2024-03-18 18:09:26,754: INFO:2444971426:Data downloaded successfully]



