# Fase 06 — Packaging (versión simplificada)

Esta versión refleja el diseño final simplificado:

- Cada variante F05 aporta **un único modelo oficial**.
- Se copian directamente los **datasets F04** (ya etiquetados).
- No se construye replay desde F02.
- No se seleccionan modelos manualmente.

El objetivo es componer un paquete sellado y reproducible.

In [1]:
import os

PHASE = "06_packaging"
VARIANT = os.environ.get("ACTIVE_VARIANT", "v601")

## Bootstrap del proyecto

In [2]:
import sys
import os
from pathlib import Path
import json
from datetime import datetime, timezone
from time import perf_counter
import shutil
import yaml
import pyarrow.parquet as pq

SCRIPT_PATH = Path.cwd().resolve()
ROOT = SCRIPT_PATH
for _ in range(10):
    if (ROOT / "mlops4ofp").exists():
        break
    ROOT = ROOT.parent
else:
    raise RuntimeError("No se pudo localizar project root")

sys.path.insert(0, str(ROOT))
print("Project root:", ROOT)


Project root: /Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp


## Cargar parámetros F06

In [3]:
from mlops4ofp.tools.params_manager import ParamsManager
from mlops4ofp.tools.run_context import assemble_run_context
from mlops4ofp.tools.artifacts import get_git_hash

pm = ParamsManager(PHASE, ROOT)
pm.set_current(VARIANT)
variant_root = pm.current_variant_dir()

with open(variant_root / "params.yaml", "r", encoding="utf-8") as f:
    params = yaml.safe_load(f)

parent_variants_f05 = params["parent_variants_f05"]
print("Parents F05:", parent_variants_f05)


Parents F05: ['v501']


## Resolver linaje y validar régimen temporal

In [4]:
lineage = {"f05": set(parent_variants_f05), "f04": set(), "f03": set()}
regimes = set()

for v05 in parent_variants_f05:
    f05_params = yaml.safe_load(
        (ROOT / "executions" / "05_modeling" / v05 / "params.yaml").read_text()
    )
    v04 = f05_params["parent_variant"]
    
    # Buscar el modelo en la carpeta models/ (solo debe haber uno)
    models_dir = ROOT / "executions" / "05_modeling" / v05 / "models"
    if not models_dir.exists():
        raise FileNotFoundError(f"No existe carpeta models/ en F05 {v05}")
    
    model_dirs = [d for d in models_dir.iterdir() if d.is_dir()]
    if len(model_dirs) == 0:
        raise RuntimeError(f"F05 {v05} no contiene ningún modelo en models/")
    if len(model_dirs) > 1:
        raise RuntimeError(f"F05 {v05} contiene múltiples modelos (se espera solo uno)")
    
    model_dir = model_dirs[0]
    model_summary_path = model_dir / "model_summary.json"
    if not model_summary_path.exists():
        raise FileNotFoundError(f"No existe model_summary.json en {model_dir}")
    
    model_summary = json.loads(model_summary_path.read_text())
    prediction_name = model_summary.get("prediction_name")
    if not prediction_name:
        raise RuntimeError(f"F05 {v05} no contiene prediction_name en model_summary.json")

    lineage["f04"].add(v04)

    if "f05_to_f04" not in globals():
        f05_to_f04 = {}

    f05_to_f04[v05] = {
        "f04": v04,
        "prediction_name": prediction_name,
    }
    
for v04 in lineage["f04"]:
    f04_params = yaml.safe_load(
        (ROOT / "executions" / "04_targetengineering" / v04 / "params.yaml").read_text()
    )
    v03 = f04_params["parent_variant"]
    lineage["f03"].add(v03)

for v03 in lineage["f03"]:
    f03_params = yaml.safe_load(
        (ROOT / "executions" / "03_preparewindowsds" / v03 / "params.yaml").read_text()
    )
    
    regime = (
        f03_params.get("Tu"),
        f03_params.get("OW"),
        f03_params.get("PW"),
    )
    regimes.add(regime)

if len(regimes) != 1:
    raise RuntimeError("Régimen temporal inconsistente entre F05")

print("Régimen común:", list(regimes)[0])

Régimen común: (None, 600, 100)


## Copiar datasets F04

In [5]:
datasets_dir = variant_root / "datasets"
datasets_dir.mkdir(exist_ok=True)

dataset_paths = []

for v04 in lineage["f04"]:
    src = ROOT / "executions" / "04_targetengineering" / v04 / "04_targetengineering_dataset.parquet"
    dst = datasets_dir / f"{v04}__dataset.parquet"
    shutil.copyfile(src, dst)
    dataset_paths.append(str(dst))

print("Datasets copiados:", dataset_paths)


Datasets copiados: ['/Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/datasets/v401__dataset.parquet']


## Copiar modelos oficiales de cada F05

In [6]:
models_dir = variant_root / "models"
models_dir.mkdir(exist_ok=True)

selected_models = []

for v05 in parent_variants_f05:
    prediction_name = f05_to_f04[v05]["prediction_name"]

    model_root = ROOT / "executions" / "05_modeling" / v05 / "models"
    model_dirs = [d for d in model_root.iterdir() if d.is_dir()]

    if len(model_dirs) != 1:
        raise RuntimeError(f"F05 {v05} debe contener exactamente un modelo")

    src = model_dirs[0]
    dst = models_dir / f"{prediction_name}__{src.name}"

    if dst.exists():
        shutil.rmtree(dst)

    shutil.copytree(src, dst)

    selected_models.append({"source_f05": v05, "model_id": src.name, "prediction_name": prediction_name})

print("Modelos copiados:", selected_models)


Modelos copiados: [{'source_f05': 'v501', 'model_id': 'battery_active_power_any-to-80_100', 'prediction_name': 'battery_active_power_any-to-80_100'}]


## Generar metadata F06

In [7]:
metadata = {
    "phase": PHASE,
    "variant": VARIANT,
    "git_commit": get_git_hash(),
    "created_at": datetime.now(timezone.utc).isoformat(),
    "temporal": list(regimes)[0],
    "lineage": {k: sorted(v) for k, v in lineage.items()},
    "models": selected_models,
    "datasets": dataset_paths,
}

metadata_path = variant_root / f"{PHASE}_metadata.json"
metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")

metadata


{'phase': '06_packaging',
 'variant': 'v601',
 'git_commit': '65221144f645c2a2fb6b754715f43626dfc18096',
 'created_at': '2026-02-16T21:48:11.779957+00:00',
 'temporal': (None, 600, 100),
 'lineage': {'f05': ['v501'], 'f04': ['v401'], 'f03': ['v301']},
 'models': [{'source_f05': 'v501',
   'model_id': 'battery_active_power_any-to-80_100',
   'prediction_name': 'battery_active_power_any-to-80_100'}],
 'datasets': ['/Users/juancarlosduenaslopez/Documents/mlops/mlops4ofp/executions/06_packaging/v601/datasets/v401__dataset.parquet']}