In [1]:
import os

project_path = r"C:\Users\user\anaconda3\envs\Chicken-disease-Classification-Project"

os.chdir(project_path)
print(f"Moved to: {os.getcwd()}")


Moved to: C:\Users\user\anaconda3\envs\Chicken-disease-Classification-Project


In [2]:
%pwd

'C:\\Users\\user\\anaconda3\\envs\\Chicken-disease-Classification-Project'

In [3]:
#os.chdir("../")

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path
    expected_hash: str = None

In [5]:
from cnnclassifier.constants import *
import os
import time
import hashlib
import zipfile
from pathlib import Path
from urllib import request
from tenacity import retry, stop_after_attempt, wait_fixed
from cnnclassifier.utils.common import  read_yaml, create_directories # Assumed utility file

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAM_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config['artifacts_root']])  # fix here

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config['data_ingestion']

        create_directories([config['root_dir']])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config['root_dir'],
            source_URL=config['source_URL'],
            local_data_file=config['local_data_file'],
            unzip_dir=config['unzip_dir'],
            expected_hash=config.get('expected_hash')  # optional
        )

        return data_ingestion_config


In [7]:
#pip install tenacity


In [8]:
import os
import zipfile
from zipfile import BadZipFile
from urllib import request
from cnnclassifier import logger  # ✅ This is correct

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_file(self):
        file_path = self.config.local_data_file
        url = self.config.source_URL

        if os.path.exists(file_path):
            try:
                with zipfile.ZipFile(file_path, 'r') as zip_ref:
                    corrupt = zip_ref.testzip()
                    if corrupt:
                        raise BadZipFile(f"Corrupted file in zip: {corrupt}")
                logger.info(f"File already exists and is a valid zip: {file_path}")
                return
            except BadZipFile as e:
                logger.warning(f"Corrupted zip file found. Deleting: {file_path}. Reason: {e}")
                os.remove(file_path)

        try:
            filename, headers = request.urlretrieve(url=url, filename=file_path)
            logger.info(f"{filename} downloaded successfully with headers: \n{headers}")
        except Exception as e:
            logger.error(f"Failed to download file from {url}. Error: {e}")
            raise

    def extract_zip_file(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)

        try:
            with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
                zip_ref.extractall(unzip_path)
                logger.info(f"Extracted {self.config.local_data_file} to {unzip_path}")
        except BadZipFile as e:
            logger.error(f"Failed to extract zip file. BadZipFile: {e}")
            raise


In [9]:
# pipeline
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    logger.exception("Error occurred during data ingestion pipeline")
    raise


[2025-05-26 10:46:11,153: INFO: common: YAML file: config\config.yaml loaded successfully]
[2025-05-26 10:46:11,171: INFO: common: YAML file: params.yaml loaded successfully]
[2025-05-26 10:46:11,177: INFO: common: Created directory at: artifacts]
[2025-05-26 10:46:11,182: INFO: common: Created directory at: artifacts/data_ingestion]


[2025-05-26 10:46:11,428: INFO: 1563970791: File already exists and is a valid zip: artifacts/data_ingestion/data.zip]
[2025-05-26 10:46:12,441: INFO: 1563970791: Extracted artifacts/data_ingestion/data.zip to artifacts/data_ingestion]
