In [1]:
from pathlib import Path
from utils.base_utils import read_yaml

yaml_path = Path("../config/config.yaml")

try:
    config = read_yaml(yaml_path)
    print(config)
except Exception as e:
    print(f"Error: {e}")

2025-12-14 19:42:20,951 | fsd_logger | INFO | base_utils.py:36 | YAML file ../config/config.yaml loaded successfully
{'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_URL': 'https://drive.google.com/file/d/1wUOvq1aDOx8qyERt7L2d2taeaVAiV6Le/view?usp=sharing', 'local_datafile': 'artifacts/data_ingestion/food_spoilage.rar', 'unzip_dir': 'artifacts/data_ingestion'}, 'data_preprocessing': {'root_dir': 'artifacts/data_preprocessing', 'unzip_dir': 'artifacts/data_ingestion', 'reshape_dir': 'artifacts/data_preprocessing', 'image_size': [224, 244]}, 'prepare_base_model': {'root_dir': 'artifacts/prepare_base_model', 'base_model_path': 'artifacts/prepare_base_model/base_model.h5', 'updated_base_model_path': 'artifacts/prepare_base_model/updated_base_model.h5', 'updated_base_model_image_path': 'artifacts/prepare_base_model/updated_base_model.png'}, 'training': {'root_dir': 'artifacts/training', 'trained_model_path': 'artifacts/training/model.h5'}}


In [2]:
from pathlib import Path
from utils.base_utils import create_directories

base_path = Path("../test")
dirs = [base_path / "dir1", base_path / "dir2"]
create_directories(dirs)

2025-12-14 19:42:20,964 | fsd_logger | INFO | base_utils.py:60 | Directory created at ../test/dir1
2025-12-14 19:42:20,970 | fsd_logger | INFO | base_utils.py:60 | Directory created at ../test/dir2


### Initial Testing

In [3]:
import os

In [4]:
%pwd

'/Volumes/Macintosh/mlflow-dvc-food-spoilage-detector/notebooks'

In [5]:
os.chdir("../")

In [6]:
%pwd

'/Volumes/Macintosh/mlflow-dvc-food-spoilage-detector'

In [7]:
import os
import zipfile
import gdown
from utils import logger

In [8]:
# config yamal and keys and this keys are same
# this is return type of a function
# data class allows to define class variable without adding self
# == Entity ==
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_datafile: Path
    unzip_dir: Path

In [9]:
# constants file has the location to config files
# updtate the configuration manager in src config

from utils.base_utils import read_yaml, create_directories
from constants import *


class ConfigurationManager:

    def __init__(self, config_filepath=CONFIG_FILE_PATH, param_path=PARAMS_FILE_PATH):
        # this will retuen config box type dictionay
        self.config = read_yaml(config_filepath)
        self.param = read_yaml(param_path)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([self.config.artifacts_root])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            source_URL=config.source_URL,
            local_datafile=config.local_datafile,
            unzip_dir=config.unzip_dir,
        )
        return data_ingestion_config

In [10]:
import rarfile
import zipfile
import gdown
from utils import logger
from pathlib import Path


# components
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def download_file(self) -> str:
        try:
            dataset_url = self.config.source_URL
            zip_donwload_dir = self.config.local_datafile
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(
                f"Donwloaded data from {dataset_url} into file {zip_donwload_dir}"
            )

            file_id = dataset_url.split("/")[-2]
            prefix_url = "https://drive.google.com/uc?/export=download&id="
            gdown.download(prefix_url + file_id, zip_donwload_dir)

        except Exception as e:
            raise e

    def extract_zip_file(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)
        file_path = self.config.local_datafile

        if file_path.endswith('.zip'):
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(unzip_path)
        elif file_path.endswith('.rar'):
            with rarfile.RarFile(file_path) as rar_ref:
                rar_ref.extractall(unzip_path)
        else:
            raise Exception(
                "File format not supported for extraction. Only .zip and .rar are supported."
            )

In [11]:
# Pipeleline
try:
    # Initilize the ConfigurationManager
    config = ConfigurationManager()
    # Get the config yaml file details
    data_ingestion_config = config.get_data_ingestion_config()
    # Initilize the DataIngestion
    data_ingestion = DataIngestion(config=data_ingestion_config)
    # Call download file
    data_ingestion.download_file()
    # Call unzip file
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e

2025-12-14 19:42:21,168 | fsd_logger | INFO | base_utils.py:36 | YAML file config/config.yaml loaded successfully
2025-12-14 19:42:21,172 | fsd_logger | INFO | base_utils.py:36 | YAML file params.yaml loaded successfully
2025-12-14 19:42:21,172 | fsd_logger | INFO | base_utils.py:60 | Directory created at artifacts
2025-12-14 19:42:21,173 | fsd_logger | INFO | base_utils.py:60 | Directory created at artifacts
2025-12-14 19:42:21,173 | fsd_logger | INFO | 2780561084.py:17 | Donwloaded data from https://drive.google.com/file/d/1wUOvq1aDOx8qyERt7L2d2taeaVAiV6Le/view?usp=sharing into file artifacts/data_ingestion/food_spoilage.rar


Downloading...
From: https://drive.google.com/uc?/export=download&id=1wUOvq1aDOx8qyERt7L2d2taeaVAiV6Le
To: /Volumes/Macintosh/mlflow-dvc-food-spoilage-detector/artifacts/data_ingestion/food_spoilage.rar
100%|██████████| 19.9M/19.9M [00:14<00:00, 1.35MB/s]
