In [30]:
!pip install h2o
import sys
import os
import numpy as np
import pandas as pd
import h2o
from h2o.estimators import H2ODeepLearningEstimator
from h2o.grid.grid_search import H2OGridSearch



In [7]:
# Verifica si estás en Google Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # En Colab, usamos gdown
    import gdown
    !gdown https://drive.google.com/uc?id=14NHeV1fvE2HXMaghHAszf_92iTBVNJHL
    !gdown  https://drive.google.com/uc?id=1g56JSd46v1HjJkhpIw7Qkal--VDd7cpS
    !gdown  https://drive.google.com/uc?id=1YA9HYeOKHMYvt9M4LAaLkO8mpJjOXktV

    data = np.load("subset_vmic10.npy")
    y = np.load("y_subset_vmic10.npy")

else:
    # En entorno local, carga desde disco
    dir_path = "/Users/rpezoa/experiment_data/data_fo/data_vmic10/"
    if os.path.exists(dir_path):
        print("Archivo cargado localmente:", dir_path + "subset_vmic10.npy")
        data = np.load(dir_path + "subset_vmic10.npy")
        y = np.load(dir_path + "y_subset_vmic10.npy")
        data_observed = np.load("/Users/rpezoa/experiment_data/data_fo/data/X_observed.npy")

    else:
        print("No se encontró el archivo local. Debes descargarlo manualmente.")

Downloading...
From: https://drive.google.com/uc?id=14NHeV1fvE2HXMaghHAszf_92iTBVNJHL
To: /content/subset_vmic10.npy
100% 16.0M/16.0M [00:00<00:00, 54.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=1g56JSd46v1HjJkhpIw7Qkal--VDd7cpS
To: /content/y_subset_vmic10.npy
100% 720k/720k [00:00<00:00, 9.74MB/s]
Downloading...
From: https://drive.google.com/uc?id=1YA9HYeOKHMYvt9M4LAaLkO8mpJjOXktV
To: /content/X_observed.npy
100% 973k/973k [00:00<00:00, 5.39MB/s]


In [33]:
# 2. Seleccionar variables objetivo (TEFF, LOGG, LOGMDOT)
y_selected = y[:, [0, 1, 7]]  # Columnas 0 (TEFF), 1 (LOGG), 7 (LOGMDOT)
# 3. Inicializar H2O
h2o.init()

# 4. Crear DataFrame combinado
df = pd.DataFrame(
    data=np.hstack([data, y_selected]),
    columns=[f"f{i}" for i in range(data.shape[1])] + ["TEFF", "LOGG", "LOGMDOT"]
)

# 5. Convertir a H2OFrame
h2o_df = h2o.H2OFrame(df)

Checking whether there is an H2O instance running at http://localhost:54321. connected.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,10 mins 15 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,3 months and 26 days
H2O_cluster_name:,H2O_from_python_unknownUser_l4goex
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.170 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [35]:
# Inicializar H2O
h2o.init()

# Definir predictores y respuestas
predictors = h2o_df.columns[:-3]  # Todas excepto las últimas 3
responses = ["TEFF", "LOGG", "LOGMDOT"]

# Función para entrenar y evaluar modelos
def train_and_evaluate(target):
    print(f"\n=== ENTRENANDO MODELO PARA {target} ===")

    # Configuración de hiperparámetros
    hyper_params = {
        "hidden": [
            [32, 32],       # Arquitectura pequeña
            [64, 64, 64],   # Arquitectura media
            [128, 64, 32],  # Arquitectura profunda
        ],
        "activation": ["Rectifier", "Tanh"],
        "input_dropout_ratio": [0.1, 0.2],
        "l1": [1e-5, 1e-4],
        "epochs": [50, 100]
    }

    # Configuración de la búsqueda
    search_criteria = {
        "strategy": "RandomDiscrete",
        "max_models": 10,
        "seed": 42,
        "stopping_rounds": 5,
        "stopping_metric": "RMSE",
        "stopping_tolerance": 0.001
    }

    # Crear y entrenar el grid
    grid = H2OGridSearch(
        model=H2ODeepLearningEstimator(
            distribution="gaussian",
            nfolds=5,
            stopping_metric="RMSE",
            stopping_tolerance=0.01,
            stopping_rounds=5,
            seed=42
        ),
        grid_id=f"grid_{target}",
        hyper_params=hyper_params,
        search_criteria=search_criteria
    )

    grid.train(x=predictors, y=target, training_frame=h2o_df)

    # Obtener el mejor modelo
    grid_perf = grid.get_grid(sort_by="RMSE", decreasing=False)
    best_model = grid_perf.models[0]

    # Mostrar arquitectura
    print(f"\nMejor modelo para {target}:")
    print(f"Capas ocultas: {best_model.params['hidden']['actual']}")
    print(f"Función de activación: {best_model.params['activation']['actual']}")
    print(f"RMSE: {best_model.model_performance().rmse()}")

    # Guardar modelo
    model_path = h2o.save_model(best_model, path=f"mejor_modelo_{target}", force=True)
    print(f"Modelo guardado en: {model_path}")

    return best_model

# Entrenar modelos para cada variable
modelos = {}
for target in responses:
    modelos[target] = train_and_evaluate(target)

print("\n=== ENTRENAMIENTO COMPLETADO ===")

Checking whether there is an H2O instance running at http://localhost:54321. connected.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,11 mins 16 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,3 months and 26 days
H2O_cluster_name:,H2O_from_python_unknownUser_l4goex
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.155 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2



=== ENTRENANDO MODELO PARA TEFF ===
deeplearning Grid Build progress: |██████████████████████████████████████████████| (done) 100%

Mejor modelo para TEFF:
Capas ocultas: [64, 64, 64]
Función de activación: Tanh
RMSE: 2052.53820719054
Modelo guardado en: /content/mejor_modelo_TEFF/grid_TEFF_model_5

=== ENTRENANDO MODELO PARA LOGG ===
deeplearning Grid Build progress: |██████████████████████████████████████████████| (done) 100%

Mejor modelo para LOGG:
Capas ocultas: [64, 64, 64]
Función de activación: Tanh
RMSE: 0.08698613531111066
Modelo guardado en: /content/mejor_modelo_LOGG/grid_LOGG_model_5

=== ENTRENANDO MODELO PARA LOGMDOT ===
deeplearning Grid Build progress: |██████████████████████████████████████████████| (done) 100%

Mejor modelo para LOGMDOT:
Capas ocultas: [128, 64, 32]
Función de activación: Rectifier
RMSE: 0.5883752635112396
Modelo guardado en: /content/mejor_modelo_LOGMDOT/grid_LOGMDOT_model_8

=== ENTRENAMIENTO COMPLETADO ===
