In [1]:
import os

In [2]:
%pwd

'/Users/azharali/Desktop/Projects/Kidney-Disease-Classification-DL/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/azharali/Desktop/Projects/Kidney-Disease-Classification-DL'

Dataclasses are a way to make Python classes easier and simpler. Classes are like blueprints for creating objects that can store data and do things with it. For example, you can make a class called Animal that has attributes like name, color, and sound, and methods like eat, sleep, and make_noise.

But to make a class like this, you have to write a lot of code, such as defining a special method called init that sets the attributes when you create an object, and another special method called repr that shows how the object looks like when you print it. Dataclasses help you avoid writing this code by automatically generating these special methods for you. All you have to do is use a decorator called @dataclass and list the attributes and their types. For example, this is how you can make a dataclass for Animal:

In [13]:
# entity here first we'll do it later in entity folder

from pathlib import Path
from dataclasses import dataclass

# dataclass is a decorator 
# Dataclasses are a way to make Python classes easier and simpler.
# Classes are like blueprints for creating objects that can store data and do things with it. 
# For example, you can make a class called Animal that has attributes like name, color, and sound, and methods like eat, sleep, and make_noise.

# But to make a class like this, you have to write a lot of code, such as defining a special method called init that sets the attributes when you create an object, 
# and another special method called repr that shows how the object looks like when you print it. 
# Dataclasses help you avoid writing this code by automatically generating these special methods for you. 
# All you have to do is use a decorator called @dataclass and list the attributes and their types. 

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_url: str
    local_data_file: Path
    unzip_dir: Path

In [14]:
from cnnClassifier.constants import * #everything
from cnnClassifier.utils.common import read_yaml, create_directories

conf

In [15]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):
    
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        # create dir key val pair using config present in config.yaml
        create_directories([self.config.artifacts_root])


    def get_data_ingestion_config(self) -> DataIngestionConfig: #this entity prepared above
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_url=config.source_url,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir
        )
        
        return data_ingestion_config

compoents

In [16]:
import os
import zipfile
import gdown
from cnnClassifier import logger

In [17]:
from cnnClassifier.utils.common import get_size

In [18]:
def get_size(path: Path) -> str:
    """
    get size in KB

    Args:
        path (Path): path of the file

    Returns:
        str: size in KB
    """

    size_in_kb = round(os.path.getsize(path)/1024)
    return f"~{size_in_kb} KB"

In [19]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_file(self) -> str:
        
        try:
            dataset_url = self.config.source_url
            zip_download_dir = self.config.local_data_file
            os.makedirs("artifacts/data_ingestion", exist_ok= True)
            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")

            file_id = dataset_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix+file_id, zip_download_dir)

            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir} ")

        except Exception as e:
            raise e
        

    def extract_zip_file(self):
        """
        zip_file_path: str
        Extracts the zip file into data directory
        Function returns None
        """

        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok= True)
        with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
            zip_ref.extractall(unzip_path)

pipline

In [20]:
# using above classes and their methods

try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

[2024-03-06 22:11:19,443: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-03-06 22:11:19,446: INFO: common: yaml file: params.yaml loaded successfully]
[2024-03-06 22:11:19,447: INFO: common: created directory at: artifacts]
[2024-03-06 22:11:19,448: INFO: common: created directory at: artifacts/data_ingestion]
[2024-03-06 22:11:19,449: INFO: 4244723930: Downloading data from https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view into file artifacts/data_ingestion/data.zip]


Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3&confirm=t&uuid=7eb3622c-1ca8-4cdb-890e-08a3b9b147eb
To: /Users/azharali/Desktop/Projects/Kidney-Disease-Classification-DL/artifacts/data_ingestion/data.zip
100%|██████████| 57.7M/57.7M [00:02<00:00, 23.5MB/s]


[2024-03-06 22:11:23,939: INFO: 4244723930: Downloaded data from https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view into file artifacts/data_ingestion/data.zip ]
