In [1]:
import os

In [2]:
%pwd

'f:\\Git Projects\\Venkey-Chicken-Disease-Classification\\research'

In [3]:
os.chdir("../")

In [11]:
%pwd

'f:\\Git Projects\\Venkey-Chicken-Disease-Classification'

In [24]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class AWSConfig:
    access_key_id: str
    secret_access_key: str
    region_name: str

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    bucket_name: str
    file_key: str
    local_data_file: Path
    unzip_dir: Path

In [25]:
from ChickenDiseaseClassifier.constants import *
from ChickenDiseaseClassifier.utils.common import read_yaml, create_directories

In [26]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        return DataIngestionConfig(
            root_dir=Path(config.root_dir),
            bucket_name=config.bucket_name,
            file_key=config.file_key,
            local_data_file=Path(config.local_data_file),
            unzip_dir=Path(config.unzip_dir)
        )
    
    def get_aws_config(self) -> AWSConfig:
        if not hasattr(self.config, "aws") or self.config.aws is None:
            raise ValueError("AWS config not found. Add 'aws' section to config.yaml or skip S3 download.")
        aws = self.config.aws
        return AWSConfig(
            access_key_id=aws.access_key_id,
            secret_access_key=aws.secret_access_key,
            region_name=aws.region_name
        )

In [27]:
import zipfile
from ChickenDiseaseClassifier import logger

In [28]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig, aws_config=None):
        self.config = config
        self.aws_config = aws_config

    def download_file_from_s3(self):
        # Optional path if using S3; requires boto3.
        import boto3  # import only if used
        if not os.path.exists(self.config.local_data_file):
            s3_client = boto3.client(
                "s3",
                aws_access_key_id=self.aws_config.access_key_id,
                aws_secret_access_key=self.aws_config.secret_access_key,
                region_name=self.aws_config.region_name
            )
            s3_client.download_file(
                Bucket=self.config.bucket_name,
                Key=self.config.file_key,
                Filename=str(self.config.local_data_file)
            )
            logger.info(f"{self.config.local_data_file} downloaded from S3.")
        else:
            logger.info(f"File already exists: {self.config.local_data_file}")

    def extract_zip_file(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, "r") as zip_ref:
            zip_ref.extractall(unzip_path)
        logger.info(f"Extracted files to: {unzip_path}")


In [29]:
try:
    cfg = ConfigurationManager()
    di_cfg = cfg.get_data_ingestion_config()

    # Option A: Local file already present at data_ingestion.local_data_file
    # Ensure artifacts/data_ingestion/data.zip exists before running.
    data_ingestion = DataIngestion(config=di_cfg, aws_config=None)
    # Optionally: If using S3, uncomment the next two lines and ensure get_aws_config works.
    # aws_cfg = cfg.get_aws_config()
    # data_ingestion = DataIngestion(config=di_cfg, aws_config=aws_cfg); data_ingestion.download_file_from_s3()

    data_ingestion.extract_zip_file()

except Exception as e:
    raise e


[2025-08-17 10:30:14,563: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-08-17 10:30:14,566: INFO: common: yaml file: params.yaml loaded successfully]
[2025-08-17 10:30:14,568: INFO: common: created directory at: artifacts]
[2025-08-17 10:30:14,569: INFO: common: created directory at: artifacts/data_ingestion]
[2025-08-17 10:30:17,675: INFO: 3976020790: Extracted files to: artifacts\data_ingestion]
