In [13]:
from pathlib import Path



# ---------- Paths (IMPORTANT) ----------

# ---------- Root Path ----------
# Repository root from /notebooks
PROJECT_ROOT = Path.cwd().parent
print("PROJECT_ROOT:", PROJECT_ROOT.exists(), "| is:", PROJECT_ROOT)
# ---------- Sub Paths ----------
# Directory for outputs
OUT_ROOT = PROJECT_ROOT / "out"
print("OUT_ROOT:", OUT_ROOT.exists(), "| is:", OUT_ROOT)
# Directory for configuration
CONFIG_ROOT = PROJECT_ROOT / "config"
print("CONFIG_ROOT:", CONFIG_ROOT.exists(), "| is:", CONFIG_ROOT)
# ---------- Yaml Paths ----------
# Paths to specific YAML configuration files
pipeline_yaml_path = CONFIG_ROOT / "pipeline" / "classification_pipeline_config.yml"
print("pipeline_yaml_path exists?", pipeline_yaml_path.exists(), "| is:",pipeline_yaml_path)
# Path to dataset configuration YAML file
dataset_yaml_path = CONFIG_ROOT / "dataset" / "dataset_config.yml"
print("dataset_yaml_path exists?", dataset_yaml_path.exists(), "| is:",dataset_yaml_path)



PROJECT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024
OUT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out
CONFIG_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config
pipeline_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
dataset_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml


In [14]:
from phm_america_2024.core.logging_utils_core import get_logger, build_log_file, init_logging

log = get_logger(__name__)
log.info("=== CONFIG SMOKE TEST START ===")

log_file = build_log_file(output_root=OUT_ROOT, run_name="test_loader")
print("Log file path:", log_file)

init_logging(log_file, level="DEBUG")


[15:29:11] [INFO] LOG_PHM_NORTH_AMERICA_2024.__main__ - === CONFIG SMOKE TEST START ===
[15:29:11] [DEBUG] LOG_PHM_NORTH_AMERICA_2024 - Logging initialized. log_file=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\logs\test_loader_20260206_152911.log level=DEBUG


Log file path: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\logs\test_loader_20260206_152911.log


<Logger LOG_PHM_NORTH_AMERICA_2024 (DEBUG)>

In [17]:
from phm_america_2024.config.load_loader_config import load_and_resolve, load_yaml, find_unresolved_placeholders
# ---------- 1) Load dataset_config.yml (if present) ----------
dataset_vars = {}
if dataset_yaml_path.exists():
    ds_cfg = load_yaml(dataset_yaml_path) # Returns dict with keys: datasets, variables
    dataset_id = "phm_north_america_2024_train"  # change if your id is different
    ds = ds_cfg["datasets"][dataset_id] # Get the dataset config for the specified dataset_id

    print("ds keys:", ds.keys())
    print("ds paths keys:", ds["paths"].keys())

    dataset_vars = {
        "dataset_path_x_train": ds["paths"]["x_train"],
        "dataset_path_y_train": ds["paths"]["y_train"],
    }
    print("Dataset vars loaded from dataset_config.yml:", dataset_vars)
else:
    log.warning("dataset_config.yml not found. Using notebook vars only.")

[15:30:34] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml
[15:30:34] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: loaded keys=['version', 'datasets']


ds keys: dict_keys(['description', 'paths', 'csv_params'])
ds paths keys: dict_keys(['x_train', 'y_train'])
Dataset vars loaded from dataset_config.yml: {'dataset_path_x_train': 'data/raw/train/X_Train.csv', 'dataset_path_y_train': 'data/raw/train/Y_Train.csv'}


In [18]:
# ---------- 2) Notebook overrides (always win) ----------
runtime_vars = dict(dataset_vars)
runtime_vars.update({
    #"dataset_path_x_train": str(PROJECT_ROOT / "data/raw/train/X_train.csv"),  # override example
    #"dataset_path_y_train": str(PROJECT_ROOT / "data/raw/train/y_train.csv"),  # override example
    "dataset_path": str(PROJECT_ROOT / "data/raw/train/X_train.csv"), # override example
    "target_col": None,
    "time_col": None,
    "id_cols": None, # override example
})
runtime_vars["output_root"] = str(OUT_ROOT)

print("Runtime vars:", runtime_vars)


Runtime vars: {'dataset_path_x_train': 'data/raw/train/X_Train.csv', 'dataset_path_y_train': 'data/raw/train/Y_Train.csv', 'dataset_path': 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\data\\raw\\train\\X_train.csv', 'target_col': None, 'time_col': None, 'id_cols': None, 'output_root': 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\out'}


In [26]:
# ---------- 3) Load + resolve pipeline YAML ----------
loaded = load_and_resolve(pipeline_yaml_path, runtime_vars=runtime_vars)

log.info("Top keys (raw): %s", list(loaded.raw.keys()))
log.info("Top keys (resolved): %s", list(loaded.resolved.keys()))
log.info("Merged variables keys: %s", sorted(list(loaded.variables.keys())))
log.info("YAML loaded from: %s", loaded.source_path)

print("Loaded config sample (raw):", loaded.raw.keys() )
print("Resolved config sample:", loaded.resolved.keys() )
print("Merged variables sample:", loaded.variables.keys() )
print("YAML loaded from:", loaded.source_path )



[15:45:13] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_and_resolve: start path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
[15:45:13] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
[15:45:13] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: loaded keys=['version', 'pipeline', 'runtime', 'stages']
[15:45:13] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - merge_variables: yaml_vars=['dataset_path', 'id_cols', 'target_col', 'time_col'] runtime_vars=['dataset_path', 'dataset_path_x_train', 'dataset_path_y_train', 'id_cols', 'output_root', 'target_col', 'time_col']
[15:45:13] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - _

Loaded config sample (raw): dict_keys(['version', 'pipeline', 'runtime', 'stages'])
Resolved config sample: dict_keys(['version', 'pipeline', 'runtime', 'stages'])
Merged variables sample: dict_keys(['dataset_path', 'target_col', 'time_col', 'id_cols', 'dataset_path_x_train', 'dataset_path_y_train', 'output_root'])
YAML loaded from: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml


In [20]:

# ----------- 4) Check for unresolved placeholders ----------
has_unresolved =  find_unresolved_placeholders(loaded.resolved)
log.info("Unresolved placeholders found: %s", has_unresolved)

has_unresolved, n = find_unresolved_placeholders(loaded.resolved)
log.info("Unresolved placeholders? %s count=%d", has_unresolved, n)

[15:34:03] [INFO] LOG_PHM_NORTH_AMERICA_2024.__main__ - Unresolved placeholders found: (False, 0)
[15:34:03] [INFO] LOG_PHM_NORTH_AMERICA_2024.__main__ - Unresolved placeholders? False count=0
