In [1]:
import os

In [2]:
%pwd  

'd:\\MLOPS\\ML-Approach-for-Predict-Cancellation-Prevent-Loss-with-MLflow\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd 

'd:\\MLOPS\\ML-Approach-for-Predict-Cancellation-Prevent-Loss-with-MLflow'

In [5]:
from dataclasses import dataclass
from pathlib import Path 

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir : Path 
    source_URL : str
    local_data_file : Path 
    unzip_dir : Path = Path()

In [6]:
from src.constants import *
from src.utils.common import *

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath = CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
    
        create_directories([self.config.artifacts_root])
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        initialize = self.config.data_ingestion 

        create_directories([initialize.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir= initialize.root_dir,
            source_URL= initialize.source_URL,
            local_data_file= initialize.local_data_file,
            unzip_dir= initialize.unzip_dir
        )

        return data_ingestion_config

In [8]:
import os
import urllib.request as request
import zipfile
from src.utils.logger import logging
from src.utils.exception import CustomException
from src.utils.common import *
import sys

In [9]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config 

    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            source_url = self.config.source_URL
            file_id = source_url.split('/')[-2]
            filename, headers = request.urlretrieve(
                url = f"https://drive.google.com/uc?id={file_id}", 
                filename = self.config.local_data_file
            )
            logging.info(f"{filename} download with following info: \n {headers}")
        else:
            logging.info(f"File already exists of size : {get_size(Path(self.config.local_data_file))}")


    def extract_zip_file(self):
        """
        Extracts the zip file into the directory specified in the configuration.
        Functions returns None.
        """  
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path, exist_ok=True)

        with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_f:
            zip_f.extractall(unzip_path) 


In [10]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise CustomException(e,sys)    

[ 2024-04-01 19:52:13,139 ] 31 root - INFO - config\config.yaml yaml file loaded successfully
[ 2024-04-01 19:52:13,142 ] 52 root - INFO - Created directory at : artifacts
[ 2024-04-01 19:52:13,143 ] 52 root - INFO - Created directory at : artifacts/data_ingestion
[ 2024-04-01 19:52:15,504 ] 13 root - INFO - artifacts/data_ingestion/data.zip download with following info: 
 X-GUploader-UploadID: ABPtcPoaouQPrwi6djy-MkvGEE0mWwoqONxTCo9OzvgAxqXWaTKMGs8V1x-2Z9nDHE2JErh3N-N8rL4CgQ
Content-Type: application/octet-stream
Content-Security-Policy: sandbox
Content-Security-Policy: default-src 'none'
Content-Security-Policy: frame-ancestors 'none'
X-Content-Security-Policy: sandbox
Cross-Origin-Opener-Policy: same-origin
Cross-Origin-Embedder-Policy: require-corp
Cross-Origin-Resource-Policy: same-site
X-Content-Type-Options: nosniff
Content-Disposition: attachment; filename="Hotel_Booking_Cancellation.zip"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: false
Access-Control-Allo