In [1]:
import os
%pwd

'f:\\ml_projects\\text_summarizer\\notbooks'

In [2]:
os.chdir("../")
%pwd

'f:\\ml_projects\\text_summarizer'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source: str
    local_data_file: Path
    unzip_dir: Path

In [8]:
from text_summarizer.constants import *
from text_summarizer.utils.common import create_directories,read_yaml


class ConfigurationManager:
    def __init__(self,config_filepath = CONFIG_FILEPATH) -> None:
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        temp_config = self.config.data_ingestion
        create_directories([temp_config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=temp_config.root_dir,
            source= temp_config.source,
            local_data_file = temp_config.local_data_file,
            unzip_dir = temp_config.unzip_dir
        )

        return data_ingestion_config


In [9]:
import gdown
import sys
from zipfile import ZipFile
from text_summarizer.utils.logger import logger
from text_summarizer.utils.exception import CustomException

In [10]:
class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config = config
    
    def download_file(self):
        try:
            if not os.path.exists(self.config.local_data_file):
                gdown.download(self.config.source, self.config.local_data_file)
                logger.info("Zip file downloaded successfully")
            else:
                logger.info("Zip file is already Existed")
        except Exception as e:
            raise CustomException(e,sys)

    def extract_file(self):
        try:
            if os.path.exists(self.config.unzip_dir):
                logger.info("File is Already Extracted")
            else:
                with ZipFile(self.config.local_data_file,'r') as zip_ref:
                    zip_ref.extractall(self.config.unzip_dir)
                logger.info('Zip file extracted Successfully')
        except Exception as e:
            raise CustomException(e,sys)
    

In [11]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config = data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_file()
except Exception as e:
    raise CustomException(e,sys)

 [ 2024-01-08 16:34:43,922 ] - 25 - common - artifacts Directory Created Successfully
<__main__.ConfigurationManager object at 0x000002900925BA30>
 [ 2024-01-08 16:34:43,926 ] - 25 - common - artifacts/data_ingestion Directory Created Successfully


Downloading...
From: https://github.com/Ankitzanzmera/data_repository/raw/main/summarizer-data.zip
To: f:\ml_projects\text_summarizer\artifacts\data_ingestion\data.zip
100%|██████████| 7.90M/7.90M [00:04<00:00, 1.93MB/s]

 [ 2024-01-08 16:34:50,964 ] - 9 - 838712992 - Zip file downloaded successfully





 [ 2024-01-08 16:34:51,277 ] - 22 - 838712992 - Zip file extracted Successfully
