# Fase 06 — Packaging (versión simplificada)

Esta versión refleja el diseño final simplificado:

- Cada variante F05 aporta **un único modelo oficial**.
- Se copian directamente los **datasets F04** (ya etiquetados).
- No se construye replay desde F02.
- No se seleccionan modelos manualmente.

El objetivo es componer un paquete sellado y reproducible.

In [1]:
import os

PHASE = "06_packaging"
VARIANT = os.environ.get("ACTIVE_VARIANT", "v601")

## Bootstrap del proyecto

In [2]:
import sys
import os
from pathlib import Path
import json
from datetime import datetime, timezone
from time import perf_counter
import shutil
import yaml
import pyarrow.parquet as pq

SCRIPT_PATH = Path.cwd().resolve()
ROOT = SCRIPT_PATH
for _ in range(10):
    if (ROOT / "mlops4ofp").exists():
        break
    ROOT = ROOT.parent
else:
    raise RuntimeError("No se pudo localizar project root")

sys.path.insert(0, str(ROOT))
print("Project root:", ROOT)


Project root: /Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp


## Cargar parámetros F06

In [3]:
from mlops4ofp.tools.params_manager import ParamsManager
from mlops4ofp.tools.traceability import write_metadata
from mlops4ofp.tools.run_context import assemble_run_context
from mlops4ofp.tools.artifacts import get_git_hash

pm = ParamsManager(PHASE, ROOT)
pm.set_current(VARIANT)
variant_root = pm.current_variant_dir()

with open(variant_root / "params.yaml", "r", encoding="utf-8") as f:
    params = yaml.safe_load(f)

parent_variants_f05 = params["parent_variants_f05"]
print("Parents F05:", parent_variants_f05)


Parents F05: ['v501', 'v502', 'v503', 'v504']


## Resolver linaje y validar régimen temporal

In [4]:
lineage = {"f05": set(parent_variants_f05), "f04": set(), "f03": set()}
f05_to_f04 = {}
temporal_by_f03 = {}

f03_registry_path = ROOT / "executions" / "03_preparewindowsds" / "variants.yaml"
f03_registry = {}
if f03_registry_path.exists():
    f03_registry = yaml.safe_load(f03_registry_path.read_text()) or {}
    f03_registry = f03_registry.get("variants", {}) or {}

for v05 in parent_variants_f05:
    f05_params = yaml.safe_load(
        (ROOT / "executions" / "05_modeling" / v05 / "params.yaml").read_text()
    )
    v04 = f05_params["parent_variant"]
    
    # Buscar el modelo en la carpeta models/ (solo debe haber uno)
    models_dir = ROOT / "executions" / "05_modeling" / v05 / "models"
    if not models_dir.exists():
        raise FileNotFoundError(f"No existe carpeta models/ en F05 {v05}")
    
    model_dirs = [d for d in models_dir.iterdir() if d.is_dir()]
    if len(model_dirs) == 0:
        raise RuntimeError(f"F05 {v05} no contiene ningún modelo en models/")
    if len(model_dirs) > 1:
        raise RuntimeError(f"F05 {v05} contiene múltiples modelos (se espera solo uno)")
    
    model_dir = model_dirs[0]
    model_summary_path = model_dir / "model_summary.json"
    if not model_summary_path.exists():
        raise FileNotFoundError(f"No existe model_summary.json en {model_dir}")
    
    model_summary = json.loads(model_summary_path.read_text())
    prediction_name = model_summary.get("prediction_name")
    if not prediction_name:
        raise RuntimeError(f"F05 {v05} no contiene prediction_name en model_summary.json")

    lineage["f04"].add(v04)
    f05_to_f04[v05] = {
        "f04": v04,
        "prediction_name": prediction_name,
    }
    
for v04 in lineage["f04"]:
    f04_params = yaml.safe_load(
        (ROOT / "executions" / "04_targetengineering" / v04 / "params.yaml").read_text()
    )
    v03 = f04_params["parent_variant"]
    lineage["f03"].add(v03)

for v03 in lineage["f03"]:
    f03_params = yaml.safe_load(
        (ROOT / "executions" / "03_preparewindowsds" / v03 / "params.yaml").read_text()
    )

    f03_metadata_path = (
        ROOT / "executions" / "03_preparewindowsds" / v03 / "03_preparewindowsds_metadata.json"
    )
    tu_value = None
    if f03_metadata_path.exists():
        f03_metadata = json.loads(f03_metadata_path.read_text())
        tu_value = f03_metadata.get("Tu")
    if tu_value is None:
        tu_value = f03_params.get("Tu")

    created_at_raw = (f03_registry.get(v03, {}) or {}).get("created_at")
    created_at_dt = datetime.min.replace(tzinfo=timezone.utc)
    if created_at_raw:
        created_at_dt = datetime.fromisoformat(created_at_raw.replace("Z", "+00:00"))

    temporal_by_f03[v03] = {
        "Tu": tu_value,
        "OW": f03_params.get("OW"),
        "PW": f03_params.get("PW"),
        "LT": f03_params.get("LT"),
        "created_at": created_at_dt,
    }

ow_values = {t["OW"] for t in temporal_by_f03.values()}
pw_values = {t["PW"] for t in temporal_by_f03.values()}
lt_values = {t["LT"] for t in temporal_by_f03.values()}

if len(ow_values) != 1 or len(pw_values) != 1 or len(lt_values) != 1:
    raise RuntimeError(
        "Las variantes F05 no comparten el mismo régimen temporal (OW, PW, LT): "
        f"{temporal_by_f03}"
    )

ordered_f03 = sorted(
    temporal_by_f03.items(),
    key=lambda x: x[1]["created_at"],
    reverse=True,
 )

tu_value = None
for _v03, temporal_data in ordered_f03:
    if temporal_data["Tu"] is not None:
        tu_value = temporal_data["Tu"]
        break

if tu_value is None:
    tu_value = ordered_f03[0][1]["Tu"]

resolved_temporal = {
    "Tu": tu_value,
    "OW": next(iter(ow_values)),
    "PW": next(iter(pw_values)),
    "LT": next(iter(lt_values)),
}

params["temporal"] = resolved_temporal
with open(variant_root / "params.yaml", "w", encoding="utf-8") as f:
    yaml.safe_dump(params, f, sort_keys=False)

print("Régimen común:", resolved_temporal)

Régimen común: {'Tu': 10.0, 'OW': 600, 'PW': 100, 'LT': 100}


In [5]:
objectives = {}

for v04 in lineage["f04"]:
    f04_params = yaml.safe_load(
        (ROOT / "executions" / "04_targetengineering" / v04 / "params.yaml").read_text()
    )

    prediction_name = f04_params.get("prediction_name")
    if not prediction_name:
        metadata_path = (
            ROOT / "executions" / "04_targetengineering" / v04 / "04_targetengineering_metadata.json"
        )
        if metadata_path.exists():
            f04_metadata = json.loads(metadata_path.read_text())
            prediction_name = f04_metadata.get("params", {}).get("prediction_name")

    if not prediction_name:
        raise RuntimeError(
            f"No se pudo resolver prediction_name para F04 {v04} desde params/metadata"
        )

    objectives[v04] = {
        "prediction_name": prediction_name,
    }

objectives_path = variant_root / "objectives.json"
objectives_path.write_text(json.dumps(objectives, indent=2), encoding="utf-8")
print("Objetivos materializados:", objectives_path)

Objetivos materializados: /Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/objectives.json


## Materializar objetivos (F04)

## Copiar datasets F04

In [6]:
datasets_dir = variant_root / "datasets"
datasets_dir.mkdir(exist_ok=True)

dataset_paths = []

for v04 in lineage["f04"]:
    src = ROOT / "executions" / "04_targetengineering" / v04 / "04_targetengineering_dataset.parquet"
    dst = datasets_dir / f"{v04}__dataset.parquet"
    shutil.copyfile(src, dst)
    dataset_paths.append(str(dst))

print("Datasets copiados:", dataset_paths)


Datasets copiados: ['/Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/datasets/v401__dataset.parquet', '/Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/datasets/v403__dataset.parquet', '/Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/datasets/v400__dataset.parquet', '/Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/datasets/v402__dataset.parquet']


## Copiar modelos oficiales de cada F05

In [7]:
models_dir = variant_root / "models"
models_dir.mkdir(exist_ok=True)

selected_models = []

for v05 in parent_variants_f05:
    prediction_name = f05_to_f04[v05]["prediction_name"]

    model_root = ROOT / "executions" / "05_modeling" / v05 / "models"
    model_dirs = [d for d in model_root.iterdir() if d.is_dir()]

    if len(model_dirs) != 1:
        raise RuntimeError(f"F05 {v05} debe contener exactamente un modelo")

    src = model_dirs[0]
    dst = models_dir / f"{prediction_name}__{src.name}"

    if dst.exists():
        shutil.rmtree(dst)

    shutil.copytree(src, dst)

    selected_models.append({"source_f05": v05, "model_id": src.name, "prediction_name": prediction_name})

print("Modelos copiados:", selected_models)


Modelos copiados: [{'source_f05': 'v501', 'model_id': 'battery_active_power_any-to-80_100', 'prediction_name': 'battery_active_power_any-to-80_100'}, {'source_f05': 'v502', 'model_id': 'battery_active_power_set_response_any-to-80_100', 'prediction_name': 'battery_active_power_set_response_any-to-80_100'}, {'source_f05': 'v503', 'model_id': 'pvpcs_active_power_any-to-80_100', 'prediction_name': 'pvpcs_active_power_any-to-80_100'}, {'source_f05': 'v504', 'model_id': 'ge_active_power_any-to-80_100', 'prediction_name': 'ge_active_power_any-to-80_100'}]


## Generar metadata F06

In [8]:
    # --------------------------------------------------
    # Metadata F06 + Trazabilidad (ESCRITURA ÚNICA)
    # --------------------------------------------------

metadata_path = variant_root / f"{PHASE}_metadata.json"

enriched_params = {
        **params,
        "temporal": resolved_temporal,
        "models": selected_models,
        "objectives": list(objectives.keys()),
        "datasets": dataset_paths,
}

write_metadata(
        stage=PHASE,
        variant=VARIANT,
        parent_variant=None,
        parent_variants=parent_variants_f05,
        inputs=dataset_paths,
        outputs=[
            str(models_dir),
            str(datasets_dir),
            str(objectives_path),
        ],
        params=enriched_params,
        metadata_path=metadata_path,
)

print("[OK] Metadata completa guardada (incluye models)")
print(f"[DONE] F06 completada")


[OK] Metadata completa guardada (incluye models)
[DONE] F06 completada
