In [1]:
import os
#It goes back to the previous folder (main folder) as we're in research folder
os.chdir('../')

In [2]:
print(os.getcwd())

d:\Data Science\Python Assignment\End to End Kidney Disease Detection


Update the entity

In [4]:
from dataclasses import dataclass
from pathlib import Path

#returning all config parameters which will be used in arrow function output entities
#@dataclass(frozen=True) is a decorator in Python's dataclasses module that makes instances of the class immutable after creation. 
# When applied, it provides the following benefits:
# Immutability: Once an obSject is created, you cannot change its attributes. This is enforced by raising an error if there is any attempt to modify the instance's fields.
# Hashability: Instances of the class become hashable (i.e., you can use them as keys in dictionaries or add them to sets) as long as all their fields are hashable.
@dataclass(frozen=True)
class PrepareBaseMLConfig:
    root_dir: Path
    base_model_path: Path
    updated_base_model_path: Path
    params_image_size: list
    params_learning_rate: float
    params_include_top: bool
    params_weights: str
    params_classes: int


Update the configuration manager in src config

In [5]:
from DiseaseClassifier.constants import *
from DiseaseClassifier.utils.common import read_yaml, create_directories


    
class ConfigurationManager:
    def __init__(self,
                 #from constants
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        #init file for referencing the config and params files
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        #we have artifacts_root in config file
        #It'll go through directories and create artifacts folder and subfolders
        create_directories([self.config.artifacts_root])
    #we've defined DataIngestionConfig class above
    def get_prepapre_base_ml_config(self)->PrepareBaseMLConfig:
        #prepare_base_ML in artifacts, config file 
        config=self.config.prepare_base_ML
        #referring to prepare_base_ML, artifacts, config file
        create_directories([config.root_dir])

        prepapre_base_ml_config=PrepareBaseMLConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            updated_base_model_path=Path(config.updated_base_model_path),
            params_image_size=self.params.IMAGE_SIZE,
            params_learning_rate=self.params.LEARNING_RATE,
            params_include_top=self.params.INCLUDE_TOP,
            params_weights=self.params.WEIGHTS,
            params_classes=self.params.CLASSES
            )
        return prepapre_base_ml_config


In [15]:
from DiseaseClassifier.constants import CONFIG_FILE_PATH
print(CONFIG_FILE_PATH)

config\config.yaml


In [6]:
CONFIG_FILE_PATH

WindowsPath('config/config.yaml')

In [7]:
import os
print(os.getcwd())

d:\Data Science\Python Assignment\End to End Kidney Disease Detection


In [16]:
import gdown
import zipfile
import os
from DiseaseClassifier import logger
from DiseaseClassifier.utils.common import get_size


class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config=config
        
    def download_file(self)->str:
        try:
            dataset_url=self.config.source_file
            zip_download_dir=self.config.local_data_file
            os.makedirs("artifcats/data_ingestion",exist_ok=True)
            logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}")

            file_id=dataset_url.split('/')[-2]
            prefix="https://drive.google.com/uc?export=download&id="
            gdown.download(prefix+file_id,zip_download_dir)

            logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}")

        except Exception as e:
            raise e
        
    def extract_zip_file(self):
        unzip_path=self.config.unzip_dir
        os.makedirs(unzip_path,exist_ok=True)
        #locating the zip file
        with zipfile.ZipFile(self.config.local_data_file) as zip_ref:
            #extract it into unzip_path
            zip_ref.extractall(unzip_path)

Creating the data pipeline

In [17]:
try:
    config= ConfigurationManager()
    data_ingestion_config=config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    #I can't download my file from the google drive so I saved it locally and moved it to artifacts file
    # data_ingestion.download_file()
    # data_ingestion.extract_zip_file()
except Exception as e:
    raise e


[2024-09-28 16:55:29,521: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-28 16:55:29,525: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-28 16:55:29,527: INFO: common: created directory at: <class 'pathlib.Path'>]
[2024-09-28 16:55:29,529: INFO: common: created directory at: <class 'pathlib.Path'>]
