In [1]:
# namedtuple is a factory function that creates a class that can be used to create objects with named fields.
from collections import namedtuple

In [2]:
(12,256,62)
# if  we assign namedtuple to a variable, it is a class, not an object.
# With namedtuple, we can assign a name to each field in the tuple.

(12, 256, 62)

In [3]:
DataIngestionConfig = namedtuple('DataIngestionConfig',
 ['dataset_download_url', 'tgz_download_dir', 'raw_data_dir', 'ingested_train_dir', 'ingested_test_dir'])

1. Download url
2. Download folder (compressed file)
3. Extract folder (extracted file)
4. Train dataset folder
5. Test dataset folder

In [4]:
data_ingestion_config = DataIngestionConfig(dataset_download_url='http://files.grouplens.org/datasets/movielens/ml-1m.zip', 
                    tgz_download_dir='data/raw', raw_data_dir='data/raw', ingested_train_dir='data/processed/train', ingested_test_dir='data/processed/test')

In [5]:
data_ingestion_config.raw_data_dir

'data/raw'

In [6]:
import yaml
import os

In [7]:
config_file_path = os.path.join("config","config.yaml")

In [8]:
config_file_path

'config\\config.yaml'

In [9]:
os.path.exists(config_file_path)

False

In [10]:
os.pardir

'..'

In [11]:
os.getcwd()

'd:\\Scripting\\machine_learning_project\\notebook'

In [12]:
os.chdir("d:\\Scripting\\machine_learning_project")

In [13]:
os.getcwd()

'd:\\Scripting\\machine_learning_project'

In [14]:
os.path.exists(config_file_path)

True

In [15]:
config_info=None
with open(config_file_path, 'rb') as yaml_file:
    config_info = yaml.safe_load(yaml_file)

In [16]:
config_info["data_ingestion_config"]

{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

In [17]:
def read_yaml_file(file_path:str)->dict:
    try:
        with open(file_path, 'rb') as yaml_file:
            return yaml.safe_load(yaml_file)
    except Exception as e:
        raise e         

In [18]:
config = read_yaml_file(config_file_path)

In [1]:
from housing.constant import *

In [20]:
TRAINING_PIPELINE_CONFIG_KEY

'training_pipeline_config'

In [21]:
config[TRAINING_PIPELINE_CONFIG_KEY][TRAINING_PIPELINE_ARTIFACT_DIR_KEY]

'artifact'

In [22]:
training_pipeline_config = config_info[TRAINING_PIPELINE_CONFIG_KEY]
artifact_dir = os.path.join(ROOT_DIR,
                        training_pipeline_config[TRAINING_PIPELINE_NAME_KEY],
                        training_pipeline_config[TRAINING_PIPELINE_ARTIFACT_DIR_KEY])

In [23]:
artifact_dir

'd:\\Scripting\\machine_learning_project\\housing\\artifact'

In [2]:
from housing.config.configuration import Configuration

In [3]:
config = Configuration("d:\\Scripting\\machine_learning_project\\config\\config.yaml")

In [4]:
config

<housing.config.configuration.Configuration at 0x1f2b7d73ba8>

In [5]:
config.get_data_ingestion_config()

DataIngestionConfig(dataset_download_url='https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz', tgz_download_dir='d:\\Scripting\\machine_learning_project\\notebook\\housing\\artifact\\data_ingestion\\2022-08-26_03-21-04\\tgz_data', raw_data_dir='d:\\Scripting\\machine_learning_project\\notebook\\housing\\artifact\\data_ingestion\\2022-08-26_03-21-04\\raw_data', ingested_train_dir='d:\\Scripting\\machine_learning_project\\notebook\\housing\\artifact\\data_ingestion\\2022-08-26_03-21-04\\ingested_data\\train', ingested_test_dir='d:\\Scripting\\machine_learning_project\\notebook\\housing\\artifact\\data_ingestion\\2022-08-26_03-21-04\\ingested_data\\test')

In [35]:
config.get_training_pipeline_config().artifact_dir

'd:\\Scripting\\machine_learning_project\\housing\\artifact'

In [6]:
DATA_INGESTION_ARTIFACT_DIR

'data_ingestion'

In [1]:
file_path = 'D:\Scripting\machine_learning_project\config'

In [3]:
os.listdir(file_path)[0]

'config.yaml'

In [1]:
sys.exc_info()

(None, None, None)

In [3]:
from collections import namedtuple
DataIngestionArtifact = namedtuple("DataIngestionArtifact", [
    "train_file_path", "test_file_path", "is_ingested", "message"])

In [9]:
experiment = DataIngestionArtifact('xyz','asdf','true','hello')

In [10]:
test = experiment._asdict()

In [11]:
test

OrderedDict([('train_file_path', 'xyz'),
             ('test_file_path', 'asdf'),
             ('is_ingested', 'true'),
             ('message', 'hello')])

In [12]:
test_dict: dict = {key: [value] for key, value in test.items()}

In [13]:
test_dict

{'train_file_path': ['xyz'],
 'test_file_path': ['asdf'],
 'is_ingested': ['true'],
 'message': ['hello']}

In [16]:
for x in test_dict.values():
    print(x)

['xyz']
['asdf']
['true']
['hello']


In [1]:
from housing.constant import *

In [2]:
TRAINING_PIPELINE_NAME_KEY

'pipeline_name'