In [1]:
import os

In [2]:
%pwd

'c:\\Users\\haris\\AI\\ML\\Apps\\Chicken_Disease\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\haris\\AI\\ML\\Apps\\Chicken_Disease'

In [43]:
# create entity
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [44]:
# importing dependencies
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [45]:
# Update the Configuration Manager
class ConfigurationManager:
    def __init__(
        self, config_filepath=CONFIG_FILE_PATH, param_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.param = read_yaml(param_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_URL=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir,
        )

        return data_ingestion_config

In [50]:
# update the components contains all the classes and methods that are gonig to be used in pipeline
import urllib.request as request
import zipfile
import os
from cnnClassifier import logger
from cnnClassifier.utils.common import get_size


class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename, headers = request.urlretrieve(
                url=self.config.source_URL, filename=self.config.local_data_file
            )
            logger.info(
                f"File {filename} downloaded with following info:  \n{headers}")
        else:
            logger.info(
                f"file {filename} already exists of size: {get_size(Path(self.config.local_data_file))}"
            )

    def extract_zip_file(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
            zip_ref.extractall(unzip_path)

In [51]:
# create data ingestion pipeline
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

[2024-01-21 00:39:57,158: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-01-21 00:39:57,164: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-21 00:39:57,170: INFO: common: created directory at: artifacts]
[2024-01-21 00:39:57,176: INFO: common: created directory at: artifacts/data_ingestion]
[2024-01-21 00:40:04,697: INFO: 1083867211: File artifacts/data_ingestion/data.zip downloaded with following info:  
Connection: close
Content-Length: 11616915
Cache-Control: max-age=300
Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox
Content-Type: application/zip
ETag: "adf745abc03891fe493c3be264ec012691fe3fa21d861f35a27edbe6d86a76b1"
Strict-Transport-Security: max-age=31536000
X-Content-Type-Options: nosniff
X-Frame-Options: deny
X-XSS-Protection: 1; mode=block
X-GitHub-Request-Id: 416E:2800FF:17950C:23F430:65AC1A87
Accept-Ranges: bytes
Date: Sat, 20 Jan 2024 19:10:01 GMT
Via: 1.1 varnish
X-Served-By: cache-del21748-DEL
X-