In [1]:
import os

In [2]:
%pwd ## project working directory

'd:\\MLOPs Project\\Kidney_Disease_Classification_Ml_Dvc\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\MLOPs Project\\Kidney_Disease_Classification_Ml_Dvc'

In [5]:
from dataclasses import dataclass
from pathlib import Path ##This module offers classes representing filesystem paths with semantics appropriate for different operating systems. Path classes are divided between pure paths, which provide purely computational operations without I/O, and concrete paths, which inherit from pure paths but also provide I/O operations.

@dataclass (frozen=True)  ##In Python, "frozen" means an object cannot be modified. For example, consider set and frozenset:
class DataIngestionConfig:  ##config.yaml file class to read the config file and store its content in a variable of this class
##This code defines a class named `DataIngestionConfig` with four attributes:

    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path
    

In [6]:
from cnnClassifier.constants import * 
from cnnClassifier.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH, ##config.yaml file path
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)  ##read config.yaml file create config object
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root]) ## create artifacts  directory if not exist


    def get_data_ingestion_config(self) -> DataIngestionConfig: ##This method is expected to return an object of type `DataIngestionConfig`, which probably contains parameters such as data source, format, transformation rules, and destination.
        config = self.config.data_ingestion ##config.yaml data ingestion: self.config =  self.config = read_yaml(config_filepath)

        create_directories([config.root_dir]) ## create data ingestion directory  if not exist inside artifacts

        data_ingestion_config = DataIngestionConfig( ##This code snippet appears to be creating a new instance of a `DataIngestionConfig` object, presumably based on an existing configuration object named `config
            root_dir=config.root_dir,
            source_URL = config.source_URL,
            local_data_file = config.local_data_file,
            unzip_dir = config.unzip_dir
            )
            
        return data_ingestion_config ##return object of DataIngestionConfig:class

In [8]:
import os
import zipfile
import gdown
from cnnClassifier import  logger
from cnnClassifier.utils.common import get_size

In [14]:
class DataIngestion:
    def __init__ (self, config: DataIngestionConfig): ##The `__init__` method in Python is a special method used to initialize objects of a class. In this case, the `__init__` method is defined to accept a parameter `config` of type `DataIngestionConfig`. When an object of this class is created, the `__init__` method is automatically called, and it expects a `DataIngestionConfig` object to be passed as an argument.
        # This means that when you create an instance of the class, you need to provide a `DataIngestionConfig` object, which likely contains configuration settings for data ingestion. These settings will then be used by the class methods to perform various operations related to data ingestion.
        self.config = config

    def download_file(self) -> str: ##return string url
        '''fetch data from the url'''

        try:
            dataset_url = self.config.source_URL # DataIngestionConfig class
            zip_download_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")

            file_id = dataset_url.split("/")[-2] ##1tsyRtl3joEDI0wxU45PfeHb4_t-XRRK1
            prefix = 'https://drive.google.com/uc?export=download&id='
            gdown.download(prefix+file_id, zip_download_dir) ##The `gdown.download` function is likely being used to download a file from Google Drive using its file ID.

            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")

        except Exception as e:
            raise e


    def extract_zip_file(self):
        """
        zip_file_path: str
        extracts the zip file into the data directory
        function returns None
        """
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)


In [15]:
#CREATE PIPELINE
try:
    config = ConfigurationManager()   #make sure that directory is change to "os.chdir("../") " other than  you ge t no such file found
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

[2023-10-20 19:53:59,133: INFO: common:yaml file: config\config.yaml loaded successfully]
[2023-10-20 19:53:59,136: INFO: common:yaml file: params.yaml loaded successfully]
[2023-10-20 19:53:59,138: INFO: common:created directory at: artifacts]
[2023-10-20 19:53:59,140: INFO: common:created directory at: artifacts/data_ingestion]
[2023-10-20 19:53:59,142: INFO: 4182046305:Downloading data from https://drive.google.com/file/d/1tsyRtl3joEDI0wxU45PfeHb4_t-XRRK1/view?usp=share_link into file artifacts/data_ingestion/data.zip]


Downloading...
From (uriginal): https://drive.google.com/uc?export=download&id=1tsyRtl3joEDI0wxU45PfeHb4_t-XRRK1
From (redirected): https://drive.google.com/uc?export=download&id=1tsyRtl3joEDI0wxU45PfeHb4_t-XRRK1&confirm=t&uuid=7e10191b-7889-4f0f-afc7-26bb28416876
To: d:\MLOPs Project\Kidney_Disease_Classification_Ml_Dvc\artifacts\data_ingestion\data.zip
100%|██████████| 79.0M/79.0M [00:07<00:00, 10.5MB/s]


[2023-10-20 19:54:09,201: INFO: 4182046305:Downloaded data from https://drive.google.com/file/d/1tsyRtl3joEDI0wxU45PfeHb4_t-XRRK1/view?usp=share_link into file artifacts/data_ingestion/data.zip]
