In [1]:
import os 

In [2]:
%pwd

'/home/ahmed/project/Kidney-Disease-Classification-Deep-learning-project/recsearch'

In [3]:
os.chdir('../')

In [4]:
%pwd

'/home/ahmed/project/Kidney-Disease-Classification-Deep-learning-project'

In [4]:
from dataclasses import dataclass 
from pathlib import Path 

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    local_data_file: Path
    unzip_dir: Path


In [5]:
CONFIG_YAML_FILE = Path(r'config_yaml/config.yaml') 
PARAM_YAML_FILE = Path(r'config_yaml/param.yaml')

from project.utils import create_directories,read_yaml

In [20]:
data_url = 'https://drive.google.com/file/d/1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib/view?usp=sharing'
#data_url = 'https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3'
file_id = data_url.split('/')[-2]
file_id

'1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib'

In [21]:
import gdown 
data_url = 'https://drive.google.com/file/d/1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib/view?usp=sharing'
file_id = data_url.split('/')[-2]


download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
print(file_id)

#gdown.download(download_url)
gdown.download(download_url,fuzzy=True)

1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib


Downloading...
From (original): https://drive.google.com/uc?id=1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib
From (redirected): https://drive.google.com/uc?id=1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib&confirm=t&uuid=162f5a62-2a40-4223-afcb-abd9a9e356cb
To: /home/ahmed/project/Kidney-Disease-Classification-Deep-learning-project/kidney-ct-scan-image-20251122T171015Z-1-001.zip
100%|██████████| 59.1M/59.1M [00:16<00:00, 3.67MB/s]


'kidney-ct-scan-image-20251122T171015Z-1-001.zip'

In [6]:
class ConfigerationManager:
    def __init__(self, config=CONFIG_YAML_FILE, param=PARAM_YAML_FILE):
        self.config = read_yaml(config)
        self.param = read_yaml(param)
       
        create_directories(self.config.artifacts_root)

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        
        create_directories(config.root_dir)

        return DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_url,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir,
        )


In [7]:
import os
import zipfile
import gdown
from project.logger import logging

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_data(self):
        data_url = self.config.source_url
        zip_down_dir = self.config.local_data_file

        # create directory safely
        os.makedirs(self.config.root_dir, exist_ok=True)

        logging.info(f"Downloading data from {data_url} to {zip_down_dir}")

        # extract file id
        file_id = data_url.split('/')[-2]

        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
        
        # download the file
        gdown.download(download_url, zip_down_dir)

        logging.info("Download completed successfully!")

    def extract_zip_file(self):
        unzip_path = self.config.unzip_dir

        os.makedirs(unzip_path, exist_ok=True)

        with zipfile.ZipFile(self.config.local_data_file, 'r') as f:
            f.extractall(unzip_path)

        logging.info(f"Extraction completed at {unzip_path}")


In [8]:
try:
    configeration = ConfigerationManager()
    data_ingestion_config = configeration.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_data()
    data_ingestion.extract_zip_file() 
except Exception as e:
    raise e

Downloading...
From (original): https://drive.google.com/uc?export=download&id=1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib
From (redirected): https://drive.google.com/uc?export=download&id=1RhMp1TyTY4YLVjMhCzAM7SqzMlSnn-Ib&confirm=t&uuid=515ca2bb-9429-4a07-99c9-a2df6034c45b
To: /home/ahmed/project/Kidney-Disease-Classification-Deep-learning-project/artifacts/data_ingestion/data.zip
100%|██████████| 59.1M/59.1M [00:16<00:00, 3.54MB/s]
