In [1]:
from pathlib import Path



# ---------- Paths (IMPORTANT) ----------

# ---------- Root Path ----------
# Repository root from /notebooks
PROJECT_ROOT = Path.cwd().parent
print("PROJECT_ROOT:", PROJECT_ROOT.exists(), "| is:", PROJECT_ROOT)
# ---------- Sub Paths ----------
# Directory for outputs
OUT_ROOT = PROJECT_ROOT / "out"
print("OUT_ROOT:", OUT_ROOT.exists(), "| is:", OUT_ROOT)
# Directory for configuration
CONFIG_ROOT = PROJECT_ROOT / "config"
print("CONFIG_ROOT:", CONFIG_ROOT.exists(), "| is:", CONFIG_ROOT)
# ---------- Yaml Paths ----------
# Paths to specific YAML configuration files
pipeline_yaml_path = CONFIG_ROOT / "pipeline" / "classification_pipeline_config.yml"
print("pipeline_yaml_path exists?", pipeline_yaml_path.exists(), "| is:",pipeline_yaml_path)
# Path to dataset configuration YAML file
dataset_yaml_path = CONFIG_ROOT / "dataset" / "dataset_config.yml"
print("dataset_yaml_path exists?", dataset_yaml_path.exists(), "| is:",dataset_yaml_path)



PROJECT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024
OUT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out
CONFIG_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config
pipeline_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
dataset_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml


In [2]:
from phm_america_2024.core.logging_utils_core import get_logger, build_log_file, init_logging

log = get_logger(__name__)
log.info("=== CONFIG SMOKE TEST START ===")

log_file = build_log_file(output_root=OUT_ROOT, run_name="test_loader")
print("Log file path:", log_file)

init_logging(log_file, level="DEBUG")


[10:49:01] [DEBUG] LOG_PHM_NORTH_AMERICA_2024 - Logging initialized. log_file=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\logs\test_loader_20260206_104901.log level=DEBUG


Log file path: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\logs\test_loader_20260206_104901.log


<Logger LOG_PHM_NORTH_AMERICA_2024 (DEBUG)>

In [3]:
from phm_america_2024.config.load_loader_config import load_and_resolve, load_yaml, find_unresolved_placeholders
# ---------- 1) Load dataset_config.yml (if present) ----------
dataset_vars = {}
if dataset_yaml_path.exists():
    ds_cfg = load_yaml(dataset_yaml_path)
    dataset_id = "phm_north_america_2024_train"  # change if your id is different
    ds = ds_cfg["datasets"][dataset_id]

    print("ds keys:", ds.keys())
    print("ds paths keys:", ds["paths"].keys())

    dataset_vars = {
        "dataset_path_x_train": ds["paths"]["x_train"],
        "dataset_path_y_train": ds["paths"]["y_train"],
    }
    #log.info("Loaded dataset_config.yml dataset_id=%s train=%s", dataset_id, dataset_vars["dataset_path"])
else:
    log.warning("dataset_config.yml not found. Using notebook vars only.")

[10:49:07] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml
[10:49:07] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: loaded keys=['version', 'datasets']


ds keys: dict_keys(['description', 'paths', 'csv_params'])
ds paths keys: dict_keys(['x_train', 'y_train'])


In [7]:
# ---------- 2) Notebook overrides (always win) ----------
runtime_vars = dict(dataset_vars)
runtime_vars.update({
    "dataset_path_x_train": str(PROJECT_ROOT / "data/raw/train/X_train.csv"),  # override example
    "dataset_path_y_train": str(PROJECT_ROOT / "data/raw/train/y_train.csv"),  # override example
    "target_col": None,
    "time_col": None,
    "id_cols": None,
})
runtime_vars["output_root"] = str(OUT_ROOT)

print("Runtime vars:", runtime_vars)


Runtime vars: {'dataset_path_x_train': 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\data\\raw\\train\\X_train.csv', 'dataset_path_y_train': 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\data\\raw\\train\\y_train.csv', 'target_col': None, 'time_col': None, 'id_cols': None, 'output_root': 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\out'}


In [9]:
# ---------- 3) Load + resolve pipeline YAML ----------
loaded = load_and_resolve(pipeline_yaml_path, runtime_vars=runtime_vars)

log.info("Top keys (raw): %s", list(loaded.raw.keys()))
log.info("Top keys (resolved): %s", list(loaded.resolved.keys()))
log.info("Merged variables keys: %s", sorted(list(loaded.variables.keys())))
log.info("YAML loaded from: %s", loaded.source_path)



[11:07:56] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_and_resolve: start path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
[11:07:56] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
[11:07:57] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - load_yaml: loaded keys=['version', 'pipeline', 'runtime', 'stages']
[11:07:57] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - merge_variables: yaml_vars=['dataset_path', 'id_cols', 'target_col', 'time_col'] runtime_vars=['dataset_path_x_train', 'dataset_path_y_train', 'id_cols', 'output_root', 'target_col', 'time_col']
[11:07:57] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - _resolve_string: 

In [10]:

# ----------- 4) Check for unresolved placeholders ----------
has_unresolved =  find_unresolved_placeholders(loaded.resolved)
log.info("Unresolved placeholders found: %s", has_unresolved)

has_unresolved, n = find_unresolved_placeholders(loaded.resolved)
log.info("Unresolved placeholders? %s count=%d", has_unresolved, n)

[11:10:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.__main__ - Unresolved placeholders found: (True, 2)
[11:10:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.__main__ - Unresolved placeholders? True count=2
