In [1]:
import os

In [2]:
pwd

'd:\\PW_DS\\YOLO\\End-To-End-Waste-Detection-Project-using-Yolo-v5\\research'

In [3]:
os.chdir('..')

In [4]:
pwd

'd:\\PW_DS\\YOLO\\End-To-End-Waste-Detection-Project-using-Yolo-v5'

## Entity

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [20]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    feature_path: Path
    data_URL: Path
    data_path: Path


## Config Manager

In [19]:
from waste_detection.utils.common import  read_yaml, create_directories
from waste_detection.constants import *

In [32]:
class ConfigManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        params_file_path = PARAMS_FILE_PATH):

        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            feature_path = config.feature_path,
            data_URL = config.data_URL,
            data_path = config.data_path
        )

        return data_ingestion_config

## Components

In [23]:
import os
import sys
import zipfile
import gdown

from waste_detection.logger import  logging
from waste_detection.exception import CustomException

In [38]:
class DataIngestion:
    def __init__(
        self,
        config: DataIngestionConfig):
        self.config = config
    
    def download_data_from_github(self):
            if not os.path.exists(self.config.data_file):
                filename, headers = request.urlretrieve(
                    url = self.config.data_url,
                    filename = self.config.data_file
                )

                logger.info(f"{filename} download! with following info: \n {headers}")

            else:
                logger.info(f"File already exists.")


    
    def download_data_from_drive(self):
        '''
        Fetch data from the url
        '''

        try: 
            dataset_url = self.config.data_URL
            zip_download_dir = self.config.root_dir
            os.makedirs(zip_download_dir, exist_ok=True)
            data_file_name = "data.zip"
            zip_file_path = os.path.join(zip_download_dir, data_file_name)
            logging.info(f"Downloading data from {dataset_url} into file {zip_file_path}")


            file_id = dataset_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix+file_id,zip_file_path)

            logging.info(f"Downloaded data from {dataset_url} into file {zip_file_path}") 

        except Exception as e:
            raise CustomException(e, sys)


    

    def extract_zip_file(self):
            """
            zip_file_pah: str
            Extarcts the zip file into the  data directory 
            Function return None
            """
            unzip_path = self.config.feature_path
            os.makedirs(unzip_path,exist_ok=True)
            # logger.info(f"{self.config.local_data_file}")
            with zipfile.ZipFile(self.config.data_path, 'r') as zip_ref:
                zip_ref.extractall(unzip_path)

            logging.info(f"{self.config.data_path} unzipped to {unzip_path}")  

## Pipeline

In [39]:
try:
    config = ConfigManager()
    data_ingestion = DataIngestion(config.get_data_ingestion_config())
    # data_ingestion.download_data_from_github()
    data_ingestion.download_data_from_drive()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise CustomException(e, sys)

Downloading...
From (original): https://drive.google.com/uc?/export=download&id=1ECfl3dtYyfivY8kYPq7RHUBTjC-2vf61
From (redirected): https://drive.google.com/uc?/export=download&id=1ECfl3dtYyfivY8kYPq7RHUBTjC-2vf61&confirm=t&uuid=636a2a92-695d-4600-b90f-bf14291acbd7
To: d:\PW_DS\YOLO\End-To-End-Waste-Detection-Project-using-Yolo-v5\artifacts\data_ingestion\data.zip
100%|██████████| 40.1M/40.1M [00:07<00:00, 5.39MB/s]
