In [3]:
import os

In [4]:
%pwd
# This command shows that currently i am in research folder so i need to move one step back to be in root folder that
# is chicken_disease_classification folder i.e the root folder of this project.

'c:\\Users\\gkart\\Desktop\\1-ProjectENDtoEND\\chicken_disease_classification\\research'

In [5]:
# To change the directory use command chdir()

os.chdir("../")
# This "../" means move back one folder

In [6]:
%pwd

'c:\\Users\\gkart\\Desktop\\1-ProjectENDtoEND\\chicken_disease_classification'

In [55]:
# Step -Updating/Creating Entity

# I can create my own custom return type using entity

from dataclasses import dataclass
# Data classes simplify the creation of classes that primarily 
# store data, as they automatically generate special methods like __init__, __repr__, and more.
from pathlib import Path


# The frozen=True argument makes the class immutable, 
# meaning that once an instance of this class is created, its attributes cannot be modified
@dataclass(frozen = True)
class DataIngestionConfig:
    root_dir:Path
    source_url:Path
    local_data_file:Path
    unzip_dir:Path

In [56]:
from chickenDiseaseClassifier.constants import *
from chickenDiseaseClassifier.utils.common import read_yaml,create_directories

In [57]:
# Step - Updating/Creating Configuration Manager

class ConfigurationManager:
    def __init__(self,config_file_path = CONFIG_FILE_PATH,params_file_path = PARAM_FILE_PATH):
        
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_configuration(self) ->DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(root_dir = config.root_dir,
                                                    source_url=config.source_url,
                                                    local_data_file = config.local_data_file, 
                                                    unzip_dir = config.unzip_dir)
        return data_ingestion_config

In [58]:
# Step - updating/creating Components

import os 
import urllib.request as request #using this urllib.request to download the dataset from the url
import zipfile 
from src.chickenDiseaseClassifier import logger
from src.chickenDiseaseClassifier.utils.common import get_size

class DataIngestion:
    def __init__(self,config: DataIngestionConfig):
        self.config = config
    
    def download_data(self):
        if not os.path.exists(self.config.local_data_file):
            file_name,headers = request.urlretrieve(url = self.config.source_url, filename = self.config.local_data_file)
            # The parameters url and filename are used to specify the source URL from which the file should be downloaded 
            # and the local file path where it should be saved, respectively.
            # The function returns a tuple containing the local file path where the file was saved (file_name) and a 
            # dictionary containing the response headers from the server (headers).
            logger.info(f"{file_name} download ! with following info : \n{headers}")
        else:
            logger.info(f"file already exsists of size : {get_size(Path(self.config.local_data_file))}")
    
    def extract_zip_file(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path,exist_ok=True)
        # The exist_ok=True argument ensures that if the directory already exists, no error will be raised, 
        # and the function will simply proceed without attempting to create the directory again.

        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)

In [66]:
# Now Creating the pipeline

try:
    configuration_manager = ConfigurationManager()
    data_ingestion_configuration  = configuration_manager.get_data_ingestion_configuration()
    data_ingestion = DataIngestion(config = data_ingestion_configuration)
    data_ingestion.download_data()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

[2023-07-28 14:35:29,416: INFO: common: yaml file : config\config.yaml loaded sucesfully]
[2023-07-28 14:35:29,423: INFO: common: yaml file : params.yaml loaded sucesfully]
[2023-07-28 14:35:29,429: INFO: common: Create directory at : artifacts]
[2023-07-28 14:35:29,435: INFO: common: Create directory at : artifacts/data_ingestion]


[2023-07-28 14:36:00,205: INFO: 412483902: artifacts/data_ingestion/data.zip download ! with following info : 
Connection: close
Content-Length: 11616915
Cache-Control: max-age=300
Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox
Content-Type: application/zip
ETag: "adf745abc03891fe493c3be264ec012691fe3fa21d861f35a27edbe6d86a76b1"
Strict-Transport-Security: max-age=31536000
X-Content-Type-Options: nosniff
X-Frame-Options: deny
X-XSS-Protection: 1; mode=block
X-GitHub-Request-Id: 9212:31F4B4:40D8C:5BE4B:64C384D8
Accept-Ranges: bytes
Date: Fri, 28 Jul 2023 09:05:31 GMT
Via: 1.1 varnish
X-Served-By: cache-del21726-DEL
X-Cache: MISS
X-Cache-Hits: 0
X-Timer: S1690535131.628750,VS0,VE1070
Vary: Authorization,Accept-Encoding,Origin
Access-Control-Allow-Origin: *
Cross-Origin-Resource-Policy: cross-origin
X-Fastly-Request-ID: f8a7bc2f443c6719718592db983548ae143ed4bf
Expires: Fri, 28 Jul 2023 09:10:31 GMT
Source-Age: 1

]


In [None]:
# This was just an experiment notebook . Now converting it inot modular coding. e.g here i have create entity in the same 
# notebook but now i will create the entity class i.e DataIngestionConfig class in entity folder and also apart from this
#  other things will updated in there respective folders.

# We dont need to write anything when converting to modular coding. just copy paste the same code.