In [25]:
from collections import namedtuple


### Explanation of Named Tuple

In [26]:
# for data ingestion, we have to specify these many configurations

# Download Url
# Download folder (compressed file)
# Extract Folder ( Extracted file)
# Train dataset folder
# Test dataset folder

In [27]:
# config. of source data
DataIngestionConfig = namedtuple("DataIngestionConfig", 
["dataset_download_url", "tgz_download_dir", "raw_data_dir", "ingested_train_dir", "ingested_test_dir"])

In [28]:
data_ingestion_config = DataIngestionConfig(dataset_download_url="demo",
tgz_download_dir="demo",
raw_data_dir="demo",
ingested_train_dir="demo",
ingested_test_dir="demo")

In [29]:
# now this is a named tuple where we can see what information is saved in what variable (name)
# now this config cant be changed
data_ingestion_config

DataIngestionConfig(dataset_download_url='demo', tgz_download_dir='demo', raw_data_dir='demo', ingested_train_dir='demo', ingested_test_dir='demo')

### How to read .yaml files

In [30]:
import yaml

In [31]:
import os

In [32]:
os.getcwd()
# to check the current location of file

'c:\\Users\\rkt7k\\Desktop\\iNeuron Data Science\\Projects iNeuron\\MLPROJ'

In [33]:
# now we need to locate that yaml file, so first step is to change the directory
os.chdir("c:\\Users\\rkt7k\\Desktop\\iNeuron Data Science\\Projects iNeuron\\MLPROJ")

In [34]:
os.getcwd()

'c:\\Users\\rkt7k\\Desktop\\iNeuron Data Science\\Projects iNeuron\\MLPROJ'

In [35]:
# to check files of current directory
os.listdir()

['.dockerignore',
 '.git',
 '.github',
 '.gitignore',
 '.vscode',
 'app.py',
 'build',
 'config',
 'dist',
 'Dockerfile',
 'housing',
 'Housing_Logs',
 'housinig_predictor.egg-info',
 'Initial Steps Done.txt',
 'LICENSE',
 'notebook',
 'Project Structure.png',
 'README.md',
 'requirements.txt',
 'setup.py',
 'venv']

In [36]:
# creating a file path for yaml file

# this os join makes file path accoriding to our operating system dynamically
config_file_path = os.path.join("config", "config.yaml")
config_file_path

'config\\config.yaml'

In [37]:
# to check if file is available or not
os.path.exists(config_file_path)

True

In [38]:
# opening yaml file
config_info = None
with open(config_file_path, "rb") as yaml_file:
    config_info = yaml.safe_load(yaml_file)

In [39]:
config_info

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_dir': 'config',
  'schema_file_name': 'schema.yaml',
  'report_file_name': 'report.json',
  'report_page_file_name': 'report.html'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_object_file_name': 'preprocessed.pkl'},
 'model_trainer_config': {'trained_model_dir': 'trained_model',
  'model_file_name': 'model.pkl',
  'base_accuracy': 0.6,
  'model_config_dir': 'config',
  'model_confi

In [40]:
# making a function to read yaml file

from tkinter import E


def read_yaml_file(file_path : str) -> dict:
    """
    Reads a YAML file and returns the contents as a dictionart.
    file_path : str
    """

    try : 
        with open(file_path, 'rb') as yaml_file:
            return yaml.safe_load(yaml_file)
    except Exception as e:
        raise e

In [41]:
yaml = read_yaml_file(config_file_path)
yaml

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_dir': 'config',
  'schema_file_name': 'schema.yaml',
  'report_file_name': 'report.json',
  'report_page_file_name': 'report.html'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_object_file_name': 'preprocessed.pkl'},
 'model_trainer_config': {'trained_model_dir': 'trained_model',
  'model_file_name': 'model.pkl',
  'base_accuracy': 0.6,
  'model_config_dir': 'config',
  'model_confi

In [44]:
from housing.constant import *

In [45]:
TRAINING_PIPELINE_CONFIG_KEY

'training_pipeline_config'

###### now we know that yaml reader returns output as dictionary, when we neeed to read value of this TRAINNG+PIPELINE_CONFIG_KEY, we can simply use it as its already hardcoded in constants

In [46]:
yaml[TRAINING_PIPELINE_CONFIG_KEY]

{'pipeline_name': 'housing', 'artifact_dir': 'artifact'}