### Example of why to use BoxConfig

In [None]:
from box import ConfigBox

In [3]:
example1 = {"key1":"Value1","Key2":"Value2"}

In [4]:
example1['key1']

'Value1'

In [5]:
example1.key1

AttributeError: 'dict' object has no attribute 'key1'

In [6]:
example1 = ConfigBox(example1)

In [7]:
example1.key1

'Value1'

### Example of why to use ensure_annotations

In [10]:
def mul(a:int,b:int) -> int:
    return a*b

In [11]:
mul(2,"4")

'44'

In [12]:
@ensure_annotations
def mul(a:int,b:int) -> int:
    return a*b

In [13]:
mul(2,"4")

EnsureError: Argument b of type <class 'str'> to <function mul at 0x0000026CA25788B0> does not match annotation type <class 'int'>

### Exprementing with the Data ingetstion phase

In [1]:
import os

In [2]:
pwd

'd:\\ML projects\\Learning\\AIML Projects\\E2EdataScience\\research'

In [3]:
os.chdir("../")

In [4]:
pwd

'd:\\ML projects\\Learning\\AIML Projects\\E2EdataScience'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [6]:
pwd

'd:\\ML projects\\Learning\\AIML Projects\\E2EdataScience'

In [None]:
from src.E2Edatascience.constants import *
from src.E2Edatascience.utils.common import *

class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH
                 ):
        self.config =  read_yaml(config_filepath)
        self.params =  read_yaml(params_filepath)
        self.schema =  read_yaml(schema_filepath)
        
        #Create root directory 
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) ->DataIngestionConfig:
        configuration = self.config.data_ingestion
        create_directories([configuration.root_dir])

        data_ingestion_config = DataIngestionConfig(

            root_dir = configuration.root_dir,
            source_URL = configuration.source_URL,
            local_data_file = configuration.local_data_file,
            unzip_dir = configuration.unzip_dir
                                                )
            
        return data_ingestion_config




In [None]:
# Component
import urllib.request as request
from src.E2Edatascience import logger
import zipfile


class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config = config 


    def download_file(self):
        if not os.path.exists(self.config.local_data_file):
            filename,headers = request.urlretrieve(
                url = self.config.source_URL,
                filename = self.config.local_data_file
            )
            logger.info(f"File downloaded {filename}")
        else:
            logger.info(f"File already exists")  

    def extract_zip_file(self):
        
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path,exist_ok= True)
        with zipfile.ZipFile(self.config.local_data_file,'r') as zip_ref:
            zip_ref.extractall(unzip_path)

        

In [11]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zip_file()
except Exception as e:
    raise e


[2025-03-07 11:40:42,818: INFO : common : yaml file: config\config.yaml loaded Successfully]
[2025-03-07 11:40:42,820: INFO : common : yaml file: params.yaml loaded Successfully]
[2025-03-07 11:40:42,822: INFO : common : yaml file: schema.yaml loaded Successfully]
[2025-03-07 11:40:42,823: INFO : common : Created directory at artifacts]
[2025-03-07 11:40:42,823: INFO : common : Created directory at artifacts/data_ingestion]
[2025-03-07 11:40:43,023: INFO : 3339967956 : File downloaded artifacts/data_ingestion/data.zip]
