In [1]:
# Config entity creation started
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    local_data_file:Path
    unzip_dir: Path


In [2]:
from ensure import ensure_annotations
from box import ConfigBox
from box.exceptions import BoxValueError

In [3]:
# Configuration manager creation started
from constants import CONFIG_FILE_PATH
from utils.common import create_directories,read_yaml

class ConfigurationManger:
    def __init__(self,config_filepath=CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])
        
    def get_dataingestion_config(self)->DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir
        )
        return data_ingestion_config

In [19]:
# Component creation started
import os
import zipfile
from utils.common import create_directories
from logger.logger import logging
from exception.exception import customexception
import sys
from pathlib import Path
from kaggle.api.kaggle_api_extended import KaggleApi
from dotenv import load_dotenv

class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config = config
        
    def download_file(self):
        try:
            download_dir = self.config.local_data_file
            root_dir = self.config.root_dir
            create_directories([self.config.root_dir])
            
            # authenticate with kaggle
            os.environ['KAGGLE_USERNAME'] = os.getenv('KAGGLE_USERNAME')
            os.environ['KAGGLE_KEY'] = os.getenv('KAGGLE_KEY')
            api = KaggleApi()
            api.authenticate()
            logging.info("Authentication succesful")
            api.competition_download_files('playground-series-s3e8', path=root_dir)
            
            for _file in os.listdir("artifacts/data_ingestion"):
                if _file == "playground-series-s3e8.zip":
                    current_path = os.path.join("artifacts/data_ingestion",_file)
                    new_path= os.path.join("artifacts/data_ingestion","data.zip")
                    os.rename(current_path,new_path)
                    break
                
            logging.info(f"Downloading data into the {download_dir}")
        except Exception as e:
            customexception(e,sys)
            
    def extract_zipfile(self):
        unzip_path = Path(self.config.unzip_dir)
        create_directories([unzip_path])
        with zipfile.ZipFile(self.config.local_data_file,'r') as zip_ref:
            zip_ref.extractall(unzip_path)
        req_files = ["train.csv","test.csv"]
        for file_ in os.listdir(self.config.root_dir):
            if not file_ in req_files:
                file_path = os.path.join(self.config.root_dir,file_)
                os.remove(file_path)
            
        logging.info("Unzipping of data completed")
        
        

In [20]:
# for _file in os.listdir("artifacts/data_ingestion"):
#     current_path = os.path.join("artifacts/data_ingestion",_file)
#     new_path= os.path.join("artifacts/data_ingestion","data.zip")
#     os.rename(current_path,new_path)

In [21]:
# req_files = ["train.csv","test.csv"]
# for file_ in os.listdir("artifacts/data_ingestion"):
#     if not file_ in req_files:
#         file_path = os.path.join("artifacts/data_ingestion",file_)
#         os.remove(file_path)

In [22]:
pwd

'e:\\Programming\\Projects\\Machine Learning Projects\\GemStonePricePrediction'

In [23]:
#cd ..

In [24]:
try:
    logging.info("Data Ingestion Started")
    config = ConfigurationManger()
    data_ingestion_config = config.get_dataingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zipfile()
    logging.info("Data Ingestion Completed")
except Exception as e:
    raise e