In [1]:
from dataclasses import dataclass
from pathlib import Path

# @dataclass automatically creates an __init__ method, so you don’t have to manually define it. This method will take each of the fields (like root_dir, source_url, etc.) as an argument, making it easier to initialize the class with configuration values.



class DataIngestionConfig:
    def __init__(self, root_dir, source_url, local_data_file, unzip_dir):
        self.root_dir = root_dir
        self.source_url = source_url
        self.local_data_file = local_data_file
        self.unzip_dir = unzip_dir

In [2]:
import os
print(os.getcwd())

/Users/likhit/Documents/Coding/MLOps/TextSummarization/research


In [3]:
os.chdir("../")
print(os.getcwd())

/Users/likhit/Documents/Coding/MLOps/TextSummarization


In [4]:
from src.text_Summarizer.constants import *
from src.text_Summarizer.utils.common import read_yaml, create_directories

In [5]:
class ConfigurationManager:

    def __init__(self,config_path = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_filepath)

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config=DataIngestionConfig(
            root_dir = Path(config.root_dir),
            source_url = config.source_url,
            local_data_file = Path(config.local_data_file),
            unzip_dir = Path(config.unzip_dir)
        )

        return data_ingestion_config    


In [6]:
import os
import urllib.request as request
import zipfile
from src.text_Summarizer.logging import logger
from urllib.request import urlretrieve

In [7]:
class DataIngestion:

    def __init__(self,config:DataIngestionConfig):
        self.config = config

    def download_file(self):

        local_data_dir = os.path.dirname(self.config.local_data_file)
        os.makedirs(local_data_dir, exist_ok=True)

        if not os.path.exists(self.config.local_data_file):

            filename , headers = urlretrieve(
                url=str(self.config.source_url),
                filename=str(self.config.local_data_file)
            )
            logger.info(f"The file has downloaded into {filename}")
        else :
            logger.info(f"File has already been downloaded")

    def zip_extract(self):
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path,exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
            zip_ref.extractall(unzip_path)


In [8]:

config = ConfigurationManager()
data_ingestion_config = config.get_data_ingestion_config()

data_ingestion=DataIngestion(config=data_ingestion_config)
data_ingestion.download_file()
data_ingestion.zip_extract()


[2024-11-01 11:05:22,829: INFO: common yaml file : config/config.yaml loaded successfully]
[2024-11-01 11:05:22,832: INFO: common yaml file : params.yaml loaded successfully]
[2024-11-01 11:05:22,833: INFO: common Create directory as airfacts/data_ingestion]
[2024-11-01 11:05:23,750: INFO: 2878926953 The file has downloaded into artifacts/data_ingestion/data.zip]


In [9]:
print(os.getcwd())

/Users/likhit/Documents/Coding/MLOps/TextSummarization
