In [1]:
import os

In [2]:
%pwd

'c:\\Users\\PMLS\\Desktop\\Jupyter notebook\\Campusx Codes\\Deep-Learning-Project\\cnnclassifier\\notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\PMLS\\Desktop\\Jupyter notebook\\Campusx Codes\\Deep-Learning-Project\\cnnclassifier'

## 1. Update config.yaml

```yaml
data_artifacts_root: data


data_ingestion:
  root_dir: data/zipped
  source_URL: https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing
  local_data_file: data/zipped/data.zip
  unzip_dir: data/raw

```

## 3. Update params.yaml

Don't leave params.yaml file empty, it will throw error, so filling it with dummy key value pair.
```yaml
key: val
```

## 3.1 Update src/constant/\_\_init\_\_.py

In [5]:
from pathlib import Path

CONFIG_FILE_PATH = Path("config/config.yaml")
PARAMS_FILE_PATH = Path("params.yaml")

## 4. Update the entity

In [11]:
# All of the variables which are written in config.yaml and specifying their types

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

## 5. Update the configuration manager src/config/configuration.py

In [12]:
from src.constants import *
from src.utils.common import read_yaml, create_directories
from src.logger import CustomLogger

class ConfigurationManager:
    def __init__(
        self,logger: CustomLogger,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        
        self.logger = logger
        self.config = read_yaml(config_filepath, logger=self.logger)
        self.params = read_yaml(params_filepath, logger=self.logger)    

        create_directories([self.config.data_artifacts_root], logger=self.logger)
        

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        
        create_directories([config.root_dir], logger=self.logger)
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            source_URL=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir
        )
        
        return data_ingestion_config

## 6. Update the components \[data preprocessing, model training, and so on\]

In [None]:
import os
import zipfile
import gdown
from src.logger import CustomLogger

class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config        
        
    def download_file(self, download_logger: CustomLogger) -> str:
        '''
        Fetch data from the url
        '''
        
        try:
            dataset_url = self.config.source_URL
            zip_download_dir = self.config.local_data_file
            os.makedirs(self.config.root_dir, exist_ok=True)
            download_logger.save_logs(msg = f"Downloading data from {dataset_url} to {zip_download_dir} successfully", log_level = "info")
            
            file_id = dataset_url.split("/")[-2]
            prefix = prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix+file_id, zip_download_dir)
            
            download_logger.save_logs(msg = f"Data downloaded from {dataset_url} into file {zip_download_dir} successfully", log_level = "info")
        
        except Exception as e:
            download_logger.save_logs(msg = f"Error in downloading data from {dataset_url} into file {zip_download_dir}", log_level = "error")
            raise e
        
        
    def extract_zip_file(self, extract_logger: CustomLogger) -> str:
        """
        zip file path: str
        Extracts the zip file into the data directory
        Function returns None
        """
        
        try:
            unzip_path = self.config.unzip_dir
            os.makedirs(unzip_path, exist_ok=True)
            with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
                zip_ref.extractall(unzip_path)
                
            extract_logger.save_logs(msg = f"Data extracted from {self.config.local_data_file} into {unzip_path} Successfully", log_level = "info")
            
        except Exception as e:
            extract_logger.save_logs(msg = f"Error in extracting data from {self.config.local_data_file} into {unzip_path}: {e}", log_level = "error")
            raise e

## 7. Update the pipeline

In [14]:
from src.logger import create_log_path, CustomLogger
import logging

# path to save the log files
download_log_file_path = create_log_path("download_dataset")
download_data_logger = CustomLogger(logger_name="download_dataset", log_filename=download_log_file_path)
download_data_logger.set_log_level(level=logging.INFO)

extract_log_file_path = create_log_path("extract_dataset")
extract_data_logger = CustomLogger(logger_name="extract_dataset", log_filename=extract_log_file_path)
extract_data_logger.set_log_level(level=logging.INFO)

data_log_file_path = create_log_path("data_ingestion")
data_logger = CustomLogger(logger_name="data_ingestion", log_filename=data_log_file_path)
data_logger.set_log_level(level=logging.INFO)


try:
    config = ConfigurationManager(data_logger)
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file(download_logger=download_data_logger)
    data_ingestion.extract_zip_file(extract_logger=extract_data_logger)
except Exception as e:
    data_logger.save_logs(msg = f"Error in Data Ingestion {e}", log_level = "error")
    raise e

Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3
From (redirected): https://drive.google.com/uc?%2Fexport=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3&confirm=t&uuid=c1eed570-f0f5-4a62-afe9-cd626a6ad2f4
To: c:\Users\PMLS\Desktop\Jupyter notebook\Campusx Codes\Deep-Learning-Project\cnnclassifier\data\zipped\data.zip
100%|██████████| 57.7M/57.7M [02:39<00:00, 362kB/s]


## 8. Update the main.py

In [None]:
# path to save the log files
download_log_file_path = create_log_path("download_dataset")
download_data_logger = CustomLogger(
    logger_name="download_dataset", log_filename=download_log_file_path
)
download_data_logger.set_log_level(level=logging.INFO)

extract_log_file_path = create_log_path("extract_dataset")
extract_data_logger = CustomLogger(
    logger_name="extract_dataset", log_filename=extract_log_file_path
)
extract_data_logger.set_log_level(level=logging.INFO)

data_log_file_path = create_log_path("data_ingestion")
data_logger = CustomLogger(
    logger_name="data_ingestion", log_filename=data_log_file_path
)
data_logger.set_log_level(level=logging.INFO)

pipeline_log_file_path = create_log_path("pipeline")
pipeline_logger = CustomLogger(
    logger_name="pipeline", log_filename=pipeline_log_file_path
)
pipeline_logger.set_log_level(level=logging.INFO)

try:
    pipeline_logger.save_logs(
        msg=f">>>>>>>>>>>> stage {STAGE_NAME} started <<<<<<<<<<<<",
        log_level="info",
    )
    obj = DataIngestionTrainingPipeline(
        download_data_logger=download_data_logger,
        extract_data_logger=extract_data_logger,
        data_logger=data_logger,
    )
    obj.main()
    pipeline_logger.save_logs(
        msg=f">>>>>>>>>>>> stage {STAGE_NAME} completed <<<<<<<<<<<<\n\nx============x",
        log_level="info",
    )
except Exception as e:
    pipeline_logger.save_logs(msg=f"Error in {STAGE_NAME} {e}", log_level="error")
    raise e


## 9. Update the dvc.yaml

## 10. app.py