In [1]:
import os

In [2]:
%pwd

'd:\\Tipto\\End-to-End-Kidney-Disease-Classifier\\notebooks'

In [3]:
os.chdir('../')
%pwd

'd:\\Tipto\\End-to-End-Kidney-Disease-Classifier'

In [4]:
from kidneyClassifier.entity.config_entity import DataIngestionConfig
from kidneyClassifier.utils.common import read_yaml_file, create_directories, get_file_size
from kidneyClassifier.constants import CONFIG_FILE_PATH , PARAMS_FILE_PATH

In [10]:
import sys
import zipfile
from kidneyClassifier.logger import logging
from kidneyClassifier.exception import KidneyException
import gdown
from kidneyClassifier.entity.artifact_entity import DataIngestionArtifact

In [None]:
class DataIngestion:
    def __init__(
        self , 
        data_ingestion_config: DataIngestionConfig = DataIngestionConfig()
    ):
        self.data_ingestion_config = data_ingestion_config
    
    def download_dataset(self):
        """
        Download dataset from the google drive
        """
        try:
            dataset_url = self.data_ingestion_config.source_url
            zip_download_dir = self.data_ingestion_config.local_data_file
            os.makedirs(self.data_ingestion_config.root_dir)
            
            gdown.download(
                url = dataset_url,
                output = zip_download_dir,
                fuzzy = True  
            )
        except Exception as e:
            logging.info(f"Failed to download dataset from the google drive")
            raise KidneyException(e , sys)
    
    def extract_zip_file(self):
        unzip_path = self.data_ingestion_config.unzip_dir
        os.makedirs(unzip_path , exist_ok = True)
        with zipfile.ZipFile(self.data_ingestion_config.local_data_file , 'r') as zip_ref:
            zip_ref.extractall(unzip_path)
    
    def initiate_data_ingestion(self) -> DataIngestionArtifact:
        """
        Initiates data ingestion process
        Returns:
            DataIngestionArtifact: Contains paths to downloaded and extracted data
        """
        try:
            self.download_dataset()
            self.extract_zip_file()
            
            data_ingestion_artifact = DataIngestionArtifact(
                extracted_data_path=self.data_ingestion_config.unzip_dir,
                downloaded_file_path=self.data_ingestion_config.local_data_file
            )
            
            logging.info(f"Data ingestion completed successfully")
            logging.info(f"Data Ingestion Artifact: {data_ingestion_artifact}")
            
            return data_ingestion_artifact
            
        except Exception as e:
            logging.error(f"Error in data ingestion: {str(e)}")
            raise KidneyException(e, sys)

In [9]:
try:
    data_ingestion = DataIngestion()
    data_ingestion.initiate_data_ingestion() 
except Exception as e:
    raise KidneyException(e , sys)

Downloading...
From (original): https://drive.google.com/uc?id=1FaDBCJsWu6nqu-LJ6Fp1YoNgV7mcpx_g
From (redirected): https://drive.google.com/uc?id=1FaDBCJsWu6nqu-LJ6Fp1YoNgV7mcpx_g&confirm=t&uuid=5c7bec89-a989-4812-bf2e-ff0c378b2100
To: d:\Tipto\End-to-End-Kidney-Disease-Classifier\artifacts\data_ingestion\data.zip
100%|██████████| 57.7M/57.7M [00:02<00:00, 24.2MB/s]
