In [1]:
from pathlib import Path

# ---------- Root Path ----------------------------------------------
# Repository root from /notebooks
PROJECT_ROOT = Path.cwd().parent
print("PROJECT_ROOT:", PROJECT_ROOT.exists(), "| is:", PROJECT_ROOT)
# -------------------------------------------------------------------


# ---------- Sub Paths ----------
OUT_ROOT = PROJECT_ROOT / "out" # Directory for outputs
print("OUT_ROOT:", OUT_ROOT.exists(), "| is:", OUT_ROOT)
CONFIG_ROOT = PROJECT_ROOT / "config" # Directory for configuration
print("CONFIG_ROOT:", CONFIG_ROOT.exists(), "| is:", CONFIG_ROOT)

# ---------- Yaml Paths ----------
# Paths to specific YAML configuration files
pipeline_yaml_path = CONFIG_ROOT / "pipeline" / "classification_pipeline_config.yml"
print("pipeline_yaml_path exists?", pipeline_yaml_path.exists(), "| is:",pipeline_yaml_path)
# Path to dataset configuration YAML file
dataset_yaml_path = CONFIG_ROOT / "dataset" / "dataset_config.yml"
print("dataset_yaml_path exists?", dataset_yaml_path.exists(), "| is:",dataset_yaml_path)

PROJECT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024
OUT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out
CONFIG_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config
pipeline_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
dataset_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml


In [2]:

#################################### Parte 0 — Logging

from phm_america_2024.core.logging_utils_core import build_log_file, init_logging


log_file = build_log_file(output_root=OUT_ROOT, run_name="classification_manual_run")
init_logging(log_file=log_file, level="DEBUG")


[20:07:46] [DEBUG] LOG_PHM_NORTH_AMERICA_2024 - Logging initialized. log_file=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\logs\classification_manual_run_20260211_200746.log level=DEBUG


<Logger LOG_PHM_NORTH_AMERICA_2024 (DEBUG)>

In [3]:
# Parte 1 — Cargar configuración (dataset + pipeline) y resolver variables

from phm_america_2024.pipelines.pipeline_context import build_context

ctx = build_context(
    dataset_config_path=str(dataset_yaml_path),
    pipeline_config_path=str(pipeline_yaml_path),
    output_root=str(OUT_ROOT)
)

cfg = ctx.cfg
variables = ctx.variables
output_root = ctx.output_root

# quick check
variables["join_key"], variables["label_col"], variables["x_train_path"]


[20:07:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - Loading YAML: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml
[20:07:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - Loading YAML: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
[20:07:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.core.seeds_utils_core - Global seed set: 42
[20:07:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.pipelines.pipeline_context - Context built. output_root=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out
[20:07:51] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.pipelines.pipeline_context - Variables keys=['join_key', 'label_col', 'output_root', 'time_col', 'x_test_path', 'x_train_path', 'x_validation_path', 'y_train_path']


('id', 'faulty', 'data/raw/train/X_train.csv')

In [4]:
# Parte 2 — Ejecutar Stage 2

from phm_america_2024.stages.stage2_understanding_runner_stages import run_stage2

stage2_out = run_stage2(cfg, variables, output_root)
# stage2_out contiene X_sample/Y_sample (muestra) y artefactos en out/


[20:07:56] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.stages.stage2_understanding_runner_stages - [run_stage2] data from the root of the yaml: version [] and pipeline [] (cfg):
{
  "pipeline": {
    "name": "classification_pipeline",
    "objective": "End-to-end CRISP-ML pipeline for supervised classification (categorical target). Stages 2→5. Generates report artifacts (PNG/JSON) and persists trained models.\n",
    "task": "classification",
    "variables": {
      "join_key": "id",
      "label_col": "faulty",
      "time_col": "${time_col}",
      "x_test_path": "data/raw/test/X_test.csv",
      "x_train_path": "data/raw/train/X_train.csv",
      "x_validation_path": "data/raw/validation/X_validation.csv",
      "y_train_path": "data/raw/train/Y_train.csv"
    }
  },
  "runtime": {
    "output_root": "out",
    "overwrite_artifacts": true,
    "random_seed": 42
  },
  "stages": {
    "stage2_understanding": {
      "dataset_input": {
        "csv_params": {
          "decima