In [None]:
import h5py
import pandas as pd
from datetime import datetime
import os

# ================== CONFIG ==================
HDF5_PLANTILLA = r"../data/REDNE_plantilla.hdf5"
CSV_FILTRO = r"../data/REDNE.csv"
OUTPUT_DIR = r"../data/Versiones"

os.makedirs(OUTPUT_DIR, exist_ok=True)

fecha = datetime.now().strftime("%Y%m%d_%H%M%S")
OUTPUT_HDF5 = os.path.join(
    OUTPUT_DIR,
    f"REDNE_{fecha}.hdf5"
)

# ================== CARGA CSV ==================
df = pd.read_csv(CSV_FILTRO)
df["trace_name"] = df["trace_name"].astype(str)

trazas_filtradas = set(df["trace_name"].unique())

print(f"Trazas a incluir: {len(trazas_filtradas)}")

# ================== PROCESO HDF5 ==================
with h5py.File(HDF5_PLANTILLA, "r") as f_in, \
     h5py.File(OUTPUT_HDF5, "w") as f_out:

    # Crear grupo /data
    grp_out = f_out.create_group("data")
    grp_in = f_in["data"]

    for trace in trazas_filtradas:

        if trace not in grp_in:
            print(f" Traza no encontrada: {trace}")
            continue

        dset_in = grp_in[trace]

        # -------- Copiar dataset --------
        dset_out = grp_out.create_dataset(
            trace,
            data=dset_in[:],
            compression="gzip"
        )

        # -------- Copiar atributos originales --------
        for k, v in dset_in.attrs.items():
            dset_out.attrs[k] = v

        # -------- Sobrescribir / agregar atributos desde CSV --------
        fila = df[df["trace_name"] == trace].iloc[0]

        for col in df.columns:
            if col == "trace_name":
                continue
            dset_out.attrs[col] = str(fila[col])

        print(f"Copiada: {trace}")

print("\n Archivo HDF5 creado:")
print(OUTPUT_HDF5)
