In [1]:
import os
os.chdir("../")

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
  root_dir: Path
  source_url: str
  local_data_file: Path
  unzip_dir: Path

In [3]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
  def __init__(
    self,
    config_filepath = CONFIG_FILE_PATH,
    params_filepath = PARAMS_FILE_PATH
  ):
    self.config = read_yaml(config_filepath)
    self.params = read_yaml(params_filepath)

    create_directories([Path(self.config.artifacts_root)])

  def get_data_ingestion_config(self) -> DataIngestionConfig:
    config = self.config.data_ingestion

    create_directories([Path(config.root_dir)])

    data_ingestion_config = DataIngestionConfig(
      root_dir=config.root_dir,
      source_url=config.source_URL,
      local_data_file=config.local_data_file,
      unzip_dir=config.unzip_dir
    )

    return data_ingestion_config

In [5]:
import os
import zipfile
import gdown
from cnnClassifier import logger
from cnnClassifier.utils.common import get_size

In [None]:
class DataIngestion:
  def __init__(self, config:DataIngestionConfig):
    self.config = config

  def download_file(self) -> str:
    try:
      dataset_url = self.config.source_url
      zip_download_dir = self.config.local_data_file
      os.makedirs("artifacts/data_ingestion", exist_ok=True)
      logger.info(f"Downloading file from :[{dataset_url}] to :[{zip_download_dir}]")

      file_id = dataset_url.split('/')[-2]
      prefix = 'https://drive.google.com/uc?/export=download&id='
      gdown.download(prefix + file_id,zip_download_dir)

      logger.info(f"Download data from [{dataset_url}] to :[{zip_download_dir}] ")
    except Exception as e:
      logger.error(f"Error occurred while downloading file from [{dataset_url}] to :[{zip_download_dir}]")


  def extract_zip_file(self) :
    unzip_path = self.config.unzip_dir
    os.makedirs(unzip_path, exist_ok=True)
    with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
      zip_ref.extractall(unzip_path)

In [13]:
try:
  config = ConfigurationManager()
  data_ingestion_config = config.get_data_ingestion_config()
  data_ingestion = DataIngestion(config=data_ingestion_config)
  data_ingestion.download_file()
  data_ingestion.extract_zip_file()

except Exception as e:
  raise e

[2025-10-20 00:10:27,437: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-10-20 00:10:27,439: INFO: common: yaml file: params.yaml loaded successfully]
[2025-10-20 00:10:27,440: INFO: common: created directory at: artifacts]
[2025-10-20 00:10:27,440: INFO: common: created directory at: artifacts\data_ingestion]
[2025-10-20 00:10:27,440: INFO: 582219750: Downloading file from :[https://drive.google.com/file/d/1h1jzZ307vV6pKkaERxFF4jn9wne9XHp7/view?usp=sharing] to :[artifacts/data_ingestion/data.zip]]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1h1jzZ307vV6pKkaERxFF4jn9wne9XHp7
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1h1jzZ307vV6pKkaERxFF4jn9wne9XHp7&confirm=t&uuid=f7c9b135-6b94-493f-bd47-48dd1f36525e
To: d:\Python Projects\Kidney-Disease-Classification\artifacts\data_ingestion\data.zip
100%|██████████| 940M/940M [04:07<00:00, 3.80MB/s] 


[2025-10-20 00:14:38,923: INFO: 582219750: Download data from [https://drive.google.com/file/d/1h1jzZ307vV6pKkaERxFF4jn9wne9XHp7/view?usp=sharing] to :[artifacts/data_ingestion/data.zip] ]
