In [1]:
import os


In [2]:
%pwd

'c:\\Users\\ARSH\\Desktop\\MLOps\\kidney-disease-classification\\research'

In [3]:
os.chdir("..")

In [4]:
%pwd

'c:\\Users\\ARSH\\Desktop\\MLOps\\kidney-disease-classification'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataingestionConfig:
    root_dir: Path
    source_url: str
    local_file: Path
    unzip_dir: Path


In [6]:
from cnnClassifier.constants import CONFIG_FILE_PATH,PARAMS_FILE_PATH
from cnnClassifier.utils.common import create_directories,read_yaml

class ConfigManager:
    def __init__(self,config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        
        create_directories([self.config.artifacts_root])
        
    
    def get_dataingestion_config(self) -> DataingestionConfig:
       config=self.config.data_ingestion
       
       create_directories([config.root_dir])

       data_ingestion_config = DataingestionConfig(
           root_dir=config.root_dir,
           source_url=config.source_url,
           local_file=config.local_file,
           unzip_dir=config.unzip_dir
       )

       return data_ingestion_config


In [7]:
import os
import zipfile
import gdown
from cnnClassifier import logger
from cnnClassifier.utils.common import get_file_size

In [8]:
class DataIngestion:
    def __init__(self, config: DataingestionConfig):
        self.config = config
        
        

    def download_data(self):
        try:
            dataset_url = self.config.source_url
            zip_download_path = self.config.local_file
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(f"Downloading data from {dataset_url} to {zip_download_path}")
            # Extract file ID from the URL
            file_id = dataset_url.split("/")[-2]
            gdown.download(f"https://drive.google.com/uc?id={file_id}", zip_download_path, quiet=False)
            logger.info(f"Data downloaded successfully to {zip_download_path}")
        except Exception as e:
            raise e
       
       
       
       
    def unzip_data(self):
        if not zipfile.is_zipfile(self.config.local_file):
            logger.error(f"{self.config.local_file} is not a valid zip file.")
            raise zipfile.BadZipFile(f"{self.config.local_file} is not a valid zip file.")
        os.makedirs(self.config.unzip_dir, exist_ok=True)
        logger.info("Unzipping data...")
        with zipfile.ZipFile(self.config.local_file, 'r') as zip_ref:
            zip_ref.extractall(self.config.unzip_dir)
            
            
    


In [9]:
try:
    config=ConfigManager()
    data_ingestion_config=config.get_dataingestion_config()
    dataingestion=DataIngestion(config=data_ingestion_config)
    dataingestion.download_data()
    dataingestion.unzip_data()
except Exception as e:
    logger.error(f"Error occurred during data ingestion: {e}")
    

YAML content loaded: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_url': 'https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharin', 'local_file': 'artifacts/data_ingestion/data.zip', 'unzip_dir': 'artifacts/data_ingestion'}, 'prepare_base_model': {'root_dir': 'artifacts/prepare_base_model', 'base_model_path': 'artifacts/prepare_base_model/base_model.h5', 'updated_base_model_path': 'artifacts/prepare_base_model/updated_model.h5'}, 'training': {'root_dir': 'artifacts/training', 'model_checkpoint': 'artifacts/training/model.h5'}}
YAML content loaded: {'AUGMENTATION': True, 'IMAGE_SIZE': [224, 224, 3], 'BATCH_SIZE': 32, 'INCLUDE_TOP': False, 'CLASSES': 2, 'WEIGHTS': 'imagenet', 'EPOCHS': 4, 'LEARNING_RATE': 0.001}
[2025-08-24 14:44:58,430] INFO in common: Created directory: artifacts
[2025-08-24 14:44:58,431] INFO in common: Created directory: artifacts/data_ingestion
[2025-08-24 14:44:58,431] INFO in 4032984887

Downloading...
From (original): https://drive.google.com/uc?id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3
From (redirected): https://drive.google.com/uc?id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3&confirm=t&uuid=0dd97300-0fbb-4e40-9afa-bdf6ad7a0934
To: c:\Users\ARSH\Desktop\MLOps\kidney-disease-classification\artifacts\data_ingestion\data.zip
100%|██████████| 57.7M/57.7M [00:05<00:00, 11.2MB/s]

[2025-08-24 14:45:07,249] INFO in 4032984887: Data downloaded successfully to artifacts/data_ingestion/data.zip
[2025-08-24 14:45:07,250] INFO in 4032984887: Unzipping data...



