In [2]:
import os

In [70]:
%pwd

'/config/workspace'

In [4]:
os.chdir("../")

In [17]:
%pwd

'/config/workspace'

In [71]:
from dataclasses import dataclass
from pathlib import Path

In [72]:
from constant  import *

In [73]:
import os
from box.exceptions import BoxValueError
import yaml
from src.bankniftyprediction.logging import logger
from ensure import ensure_annotations
from box import ConfigBox
from pathlib import Path
from typing import Any

@ensure_annotations
def read_yaml(path_to_yaml: Path) -> ConfigBox:
    """reads yaml file and returns

    Args:
        path_to_yaml (str): path like input

    Raises:
        ValueError: if yaml file is empty
        e: empty file

    Returns:
        ConfigBox: ConfigBox type
    """
    try:
        with open(path_to_yaml) as yaml_file:
            content = yaml.safe_load(yaml_file)
            logger.info(f"yaml file: {path_to_yaml} loaded successfully")
            return ConfigBox(content)
    except BoxValueError:
        raise ValueError("yaml file is empty")
    except Exception as e:
        raise e
    


@ensure_annotations
def create_directories(path_to_directories: list, verbose=True):
    """create list of directories

    Args:
        path_to_directories (list): list of path of directories
        ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
    """
    for path in path_to_directories:
        os.makedirs(path, exist_ok=True)
        if verbose:
            logger.info(f"created directory at: {path}")



@ensure_annotations
def get_size(path: Path) -> str:
    """get size in KB

    Args:
        path (Path): path of the file

    Returns:
        str: size in KB
    """
    size_in_kb = round(os.path.getsize(path)/1024)
    return f"~ {size_in_kb} KB"

    



In [74]:
config_filepath = CONFIG_FILE_PATH

In [75]:
config_filepath

PosixPath('config/config.yaml')

In [76]:
config = read_yaml(config_filepath)

[2024-01-22 18:13:12,923: INFO: 1892379423: yaml file: config/config.yaml loaded successfully]


In [77]:
config

ConfigBox({'artifacts_root': 'artifact', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'data': '1RELIANCE.csv', 'local_data_file': 'artifacts/data_ingestion/1RELIANCE.csv'}, 'data_validation': {'root_dir': 'artifacts/data_validation', 'STATUS_FILE': 'artifacts/data_validation/status.txt', 'ALL_REQUIRED_FILES': ['train', 'test', 'validation']}, 'data_transformation': {'root_dir': 'artifacts/data_transformation', 'data_path': 'artifacts/data_ingestion/samsum_dataset', 'tokenizer_name': 'google/pegasus-cnn_dailymail'}, 'model_trainer': {'root_dir': 'artifacts/model_trainer', 'data_path': 'artifacts/data_transformation/samsum_dataset', 'model_ckpt': 'google/pegasus-cnn_dailymail'}, 'model_evaluation': {'root_dir': 'artifacts/model_evaluation', 'data_path': 'artifacts/data_transformation/samsum_dataset', 'model_path': 'artifacts/model_trainer/pegasus-samsum-model', 'tokenizer_path': 'artifacts/model_trainer/tokenizer', 'metric_file_name': 'artifacts/model_evaluation/metrics.csv

In [35]:
create_directories([config.artifacts_root])

[2024-01-22 17:24:27,905: INFO: 1892379423: created directory at: artifact]


In [78]:
config=config.data_ingestion

In [38]:
create_directories([config.root_dir])

[2024-01-22 17:26:20,957: INFO: 1892379423: created directory at: artifacts/data_ingestion]


In [83]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        

        create_directories([self.config.artifacts_root])

    

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            data=config.data,
            local_data_file=config.local_data_file,
           
        )

        return data_ingestion_config
    

In [84]:
DataIngestionConfig

__main__.DataIngestionConfig

In [92]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config


    
    def move_file(self):
        if not os.path.exists(self.config.local_data_file):

            N='/config/workspace/1RELIANCE.csv'

            shutil.move(N,'/config/workspace/artifacts/data_ingestion')
           
        else:
            logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")  

        
    
   

In [46]:
import shutil

In [86]:
config = ConfigurationManager()

[2024-01-22 18:15:22,107: INFO: 1892379423: yaml file: config/config.yaml loaded successfully]
[2024-01-22 18:15:22,109: INFO: 1892379423: created directory at: artifact]


In [93]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.move_file()
    
except Exception as e:
    raise e

[2024-01-22 18:19:12,113: INFO: 1892379423: yaml file: config/config.yaml loaded successfully]
[2024-01-22 18:19:12,115: INFO: 1892379423: created directory at: artifact]
[2024-01-22 18:19:12,116: INFO: 1892379423: created directory at: artifacts/data_ingestion]


In [69]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    data: str
    local_data_file: Path