In [1]:
from pathlib import Path

# ---------- Root Path ----------------------------------------------
# Repository root from /notebooks
PROJECT_ROOT = Path.cwd().parent
print("PROJECT_ROOT:", PROJECT_ROOT.exists(), "| is:", PROJECT_ROOT)
# -------------------------------------------------------------------


# ---------- Sub Paths ----------
OUT_ROOT = PROJECT_ROOT / "out" # Directory for outputs
print("OUT_ROOT:", OUT_ROOT.exists(), "| is:", OUT_ROOT)
CONFIG_ROOT = PROJECT_ROOT / "config" # Directory for configuration
print("CONFIG_ROOT:", CONFIG_ROOT.exists(), "| is:", CONFIG_ROOT)

# ---------- Yaml Paths ----------
# Paths to specific YAML configuration files
pipeline_yaml_path = CONFIG_ROOT / "pipeline" / "classification_pipeline_config.yml"
print("pipeline_yaml_path exists?", pipeline_yaml_path.exists(), "| is:",pipeline_yaml_path)
# Path to dataset configuration YAML file
dataset_yaml_path = CONFIG_ROOT / "dataset" / "dataset_config.yml"
print("dataset_yaml_path exists?", dataset_yaml_path.exists(), "| is:",dataset_yaml_path)

PROJECT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024
OUT_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out
CONFIG_ROOT: True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config
pipeline_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
dataset_yaml_path exists? True | is: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml


In [2]:

#################################### Parte 0 — Logging

from phm_america_2024.core.logging_utils_core import build_log_file, init_logging


log_file = build_log_file(output_root=OUT_ROOT, run_name="classification_manual_run")
init_logging(log_file=log_file, level="DEBUG")


[19:15:24] [DEBUG] LOG_PHM_NORTH_AMERICA_2024 - Logging initialized. log_file=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\logs\classification_manual_run_20260207_191524.log level=DEBUG


<Logger LOG_PHM_NORTH_AMERICA_2024 (DEBUG)>

In [3]:
# Parte 1 — Cargar configuración (dataset + pipeline) y resolver variables

from phm_america_2024.pipelines.pipeline_context import build_context

ctx = build_context(
    dataset_config_path=str(dataset_yaml_path),
    pipeline_config_path=str(pipeline_yaml_path),
    output_root=str(OUT_ROOT)
)

cfg = ctx.cfg
variables = ctx.variables
output_root = ctx.output_root

# quick check
variables["join_key"], variables["label_col"], variables["x_train_path"]


[19:15:26] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - Loading YAML: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\dataset\dataset_config.yml
[19:15:26] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.config.load_loader_config - Loading YAML: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\config\pipeline\classification_pipeline_config.yml
[19:15:26] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.core.seeds_utils_core - Global seed set: 42
[19:15:26] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.pipelines.pipeline_context - Context built. output_root=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out
[19:15:26] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.pipelines.pipeline_context - Variables keys=['join_key', 'label_col', 'output_root', 'time_col', 'x_test_path', 'x_train_path', 'x_validation_path', 'y_train_path']


('id', 'faulty', 'data/raw/train/X_train.csv')

In [4]:
# Parte 2 — Ejecutar Stage 2

from phm_america_2024.stages.stage2_understanding_runner_stages import run_stage2

stage2_out = run_stage2(cfg, variables, output_root)
# stage2_out contiene X_sample/Y_sample (muestra) y artefactos en out/


[19:15:30] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.stages.stage2_understanding_runner_stages - [run_stage2] cfg is: {'version': '1.0', 'pipeline': {'name': 'classification_pipeline', 'task': 'classification', 'objective': 'End-to-end CRISP-ML pipeline for supervised classification (categorical target). Stages 2→5. Generates report artifacts (PNG/JSON) and persists trained models.\n', 'variables': {'x_train_path': 'data/raw/train/X_train.csv', 'y_train_path': 'data/raw/train/Y_train.csv', 'x_test_path': 'data/raw/test/X_test.csv', 'x_validation_path': 'data/raw/validation/X_validation.csv', 'join_key': 'id', 'label_col': 'faulty', 'time_col': '${time_col}'}}, 'runtime': {'random_seed': 42, 'output_root': 'out', 'overwrite_artifacts': True}, 'stages': {'stage2_understanding': {'enabled': True, 'objective': 'Load CSV + describe structure + data quality assessment + EDA. Stage 2 MUST NOT modify data (report-only).\n', 'dataset_input': {'source_type': 'csv', 'path': 'data/raw/tra

In [5]:
# Parte 3 — Ejecutar Stage 3
from phm_america_2024.stages.stage3_preparation_runner_stages import run_stage3

stage3_out = run_stage3(cfg, variables, output_root)

stage3_out["X_train"].shape, stage3_out["X_valid"].shape


[19:15:36] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.stages.stage3_preparation_runner_stages - === STAGE 3 START [run_stage3] ===
[19:15:36] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.stages.stage3_preparation_runner_stages - Stage3 resolved paths: x_path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\data\raw\train\X_train.csv (exists=True) y_path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\data\raw\train\Y_train.csv (exists=True)
[19:15:36] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.data.load_utils_data - read_csv: path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\data\raw\train\X_train.csv mode=full
[19:15:37] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.data.load_utils_data - read_csv(full): shape=(742625, 8)
[19:15:37] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.data.load_utils_data - read_csv: path=K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\data\raw\train\Y_train.csv mode=full
[19:15:37

((594100, 7), (148525, 7))

In [6]:
# Parte 4 — Ejecutar Stage 4
from phm_america_2024.stages.stage4_modeling_runner_stages import run_stage4

stage4_out = run_stage4(cfg, stage3_out, output_root)

stage4_out["best_model_name"], stage4_out["best_model_path"]


[19:17:31] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.stages.stage4_modeling_runner_stages - === STAGE 4 START ===
[19:17:31] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.model_registry_models - Enabled estimators: ['decision_tree_classifier', 'random_forest_classifier']
[19:17:31] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.train_service_models - Training: decision_tree_classifier
[19:17:40] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.train_service_models - Trained: decision_tree_classifier
[19:17:40] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.train_service_models - Training: random_forest_classifier
[19:18:00] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.train_service_models - Trained: random_forest_classifier
[19:18:01] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.evaluate_service_models - Eval: acc=0.9971 f1_weighted=0.9971
[19:18:01] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.models.e

('random_forest_classifier',
 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\out\\models\\best_model.joblib')

In [7]:
# Parte 5 — Ejecutar Stage 5
from phm_america_2024.stages.stage5_evaluation_runner_stages import run_stage5

stage5_out = run_stage5(cfg, stage3_out, stage4_out, output_root)

stage5_out


[19:18:15] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.stages.stage5_evaluation_runner_stages - === STAGE 5 START ===
[19:18:15] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.interpretation.explain_service_interpretation - feature_importance computed: top_k=30
[19:18:44] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.interpretation.explain_service_interpretation - permutation_importance computed: top_k=30 n_repeats=10
[19:18:44] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.interpretation.explain_service_interpretation - confusion_matrix computed normalize=true
[19:18:44] [DEBUG] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.core.helpers_utils_core - ensure_dir: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\figures\stage5_evaluation_and_interpretation
[19:18:44] [INFO] LOG_PHM_NORTH_AMERICA_2024.phm_america_2024.reporting.plots_utils_reporting - Saved figure: K:\00_Code\Manutenzione\Project_MPPR-AI_B_PHM_America_2024\out\figures\stage5_evaluation_and

{'interpretation_json': 'K:\\00_Code\\Manutenzione\\Project_MPPR-AI_B_PHM_America_2024\\out\\interpretation.json'}