In [1]:
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor

import mlflow
from mlflow.models.signature import infer_signature

import os
from dotenv import load_dotenv
import pickle
import pandas as pd
import math
import pathlib

from sklearn.metrics import root_mean_squared_error
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import cross_val_score

import optuna
from optuna.samplers import TPESampler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os, mlflow
from dotenv import load_dotenv

load_dotenv(override=True)  # Carga las variables del archivo .env
EXPERIMENT_NAME = "/Users/roiflores.2213@gmail.com/nyc-taxi-experiments"

mlflow.set_tracking_uri("databricks")
experiment = mlflow.set_experiment(experiment_name=EXPERIMENT_NAME)

In [3]:
def read_dataframe(filename):

    df = pd.read_parquet(filename)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)

    return df

In [4]:
df_train = read_dataframe("../data/green_tripdata_2025-01.parquet")
df_val = read_dataframe("../data/green_tripdata_2025-02.parquet")
df_test = read_dataframe("../data/green_tripdata_2025-03.parquet")

In [5]:
def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

In [6]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
categorical = ['PU_DO']
numerical = ['trip_distance']
dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

X_val = preprocess(df_val, dv)
X_test = preprocess(df_test, dv)

In [7]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values
y_test = df_test[target].values

In [8]:
X_val.shape[1], X_train.shape[1], X_val.shape[1]

(4159, 4159, 4159)

In [9]:
training_dataset = mlflow.data.from_numpy(X_train.data, targets=y_train, name="green_tripdata_2025-01")
validation_dataset = mlflow.data.from_numpy(X_val.data, targets=y_val, name="green_tripdata_2025-02")
testing_dataset = mlflow.data.from_numpy(X_test.data, targets=y_test, name="green_tripdata_2025_03")

In [10]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)
test = xgb.DMatrix(X_test, label=y_test)

In [11]:
train.num_col(), valid.num_col(), test.num_col()

(4159, 4159, 4159)

In [12]:
def objective(trial: optuna.trial.Trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 4, 100),
        "learning_rate": trial.suggest_float("learning_rate", math.exp(-3), 1.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha",   math.exp(-5), math.exp(-1), log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", math.exp(-6), math.exp(-1), log=True),
        "min_child_weight": trial.suggest_float("min_child_weight", math.exp(-1), math.exp(3), log=True),
        "objective": "reg:squarederror",  
        "seed": 42, 
    }
    
    with mlflow.start_run(nested=True):
        mlflow.set_tag("model_family", "xgboost")
        mlflow.log_params(params)
        
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=100,
            evals=[(valid, "validation")],
            early_stopping_rounds=10
        )
        
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        
        mlflow.log_metric("rmse", rmse)
        
        signature = infer_signature(X_val, y_pred)
        
        mlflow.xgboost.log_model(
            booster,
            name="model",
            input_example=X_val[:5],
            signature=signature
        )
        
    return rmse

In [13]:
import time 
def objective_rtr(trial: optuna.trial.Trial) -> float:
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 2000, step=100),
        "max_depth": trial.suggest_int("max_depth", 5, 50, log=True),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
        "max_features": trial.suggest_float("max_features", 0.1, 1.0)
    }
    
    with mlflow.start_run(nested=True):
        mlflow.set_tag("model_family", "rfr")
        mlflow.log_params(params)
        
        model_rfr = RandomForestRegressor(**params)
        model_rfr.fit(X_train, y_train)
        
        y_pred = model_rfr.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        
        mlflow.log_metric("validation_rmse", rmse)
        
        signature = infer_signature(X_val, y_pred)
        
        mlflow.sklearn.log_model(
            model_rfr,
            name="model",
            input_example=X_val[:5],
            signature=signature
        )
        
        return rmse


In [14]:
mlflow.xgboost.autolog(log_models=False)

# ------------------------------------------------------------
# Crear el estudio de Optuna
#    - Usamos TPE (Tree-structured Parzen Estimator) como sampler.
#    - direction="minimize" porque queremos minimizar el RMSE.
# ------------------------------------------------------------
sampler = TPESampler(seed=42)
study = optuna.create_study(direction="minimize", sampler=sampler)

# ------------------------------------------------------------
# Ejecutar la optimización (n_trials = número de intentos)
#    - Cada trial ejecuta la función objetivo con un set distinto de hiperparámetros.
#    - Abrimos un run "padre" para agrupar toda la búsqueda.
# ------------------------------------------------------------
with mlflow.start_run(run_name="XGBoost Hyperparameter Optimization (Optuna) HW", nested=True):
    study.optimize(objective, n_trials=3)

    # --------------------------------------------------------
    # Recuperar y registrar los mejores hiperparámetros
    # --------------------------------------------------------
    best_params = study.best_params
    # Asegurar tipos/campos fijos (por claridad y consistencia)
    best_params["max_depth"] = int(best_params["max_depth"])
    best_params["seed"] = 42
    best_params["objective"] = "reg:squarederror"

    mlflow.log_params(best_params)

    # Etiquetas del run "padre" (metadatos del experimento)
    mlflow.set_tags({
        "project": "NYC Taxi Time Prediction Project",
        "optimizer_engine": "optuna",
        "model_family": "xgboost",
        "feature_set_version": 1,
    })

    # --------------------------------------------------------
    # 7) Entrenar un modelo FINAL con los mejores hiperparámetros
    #    (normalmente se haría sobre train+val o con CV; aquí mantenemos el patrón original)
    # --------------------------------------------------------
    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=100,
        evals=[(valid, "validation")],
        early_stopping_rounds=10,
    )

    # Evaluar y registrar la métrica final en validación
    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    # --------------------------------------------------------
    # 8) Guardar artefactos adicionales (p. ej. el preprocesador)
    # --------------------------------------------------------
    pathlib.Path("preprocessor").mkdir(exist_ok=True)
    with open("preprocessor/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)

    mlflow.log_artifact("preprocessor/preprocessor.b", artifact_path="preprocessor")

    # La "signature" describe la estructura esperada de entrada y salida del modelo:
    # incluye los nombres, tipos y forma (shape) de las variables de entrada y el tipo de salida.
    # MLflow la usa para validar datos en inferencia y documentar el modelo en el Model Registry.
    # Si X_val es la matriz dispersa (scipy.sparse) salida de DictVectorizer:
    feature_names = dv.get_feature_names_out()
    input_example = pd.DataFrame(X_val[:5].toarray(), columns=feature_names)

    # Para que las longitudes coincidan, usa el mismo slice en y_pred
    signature = infer_signature(input_example, y_val[:5])

    # Guardar el modelo del trial como artefacto en MLflow.
    mlflow.xgboost.log_model(
        booster,
        name="model",
        input_example=input_example,
        signature=signature
    )


[I 2025-10-29 23:36:27,877] A new study created in memory with name: no-name-16b78f34-ba55-4452-8452-21243868e58f


[0]	validation-rmse:5.72427
[1]	validation-rmse:5.57860
[2]	validation-rmse:5.56409
[3]	validation-rmse:5.56982
[4]	validation-rmse:5.57347
[5]	validation-rmse:5.55585
[6]	validation-rmse:5.55736
[7]	validation-rmse:5.55253
[8]	validation-rmse:5.55232
[9]	validation-rmse:5.53322
[10]	validation-rmse:5.53156
[11]	validation-rmse:5.53006
[12]	validation-rmse:5.52808
[13]	validation-rmse:5.52782
[14]	validation-rmse:5.52451
[15]	validation-rmse:5.52324
[16]	validation-rmse:5.52269
[17]	validation-rmse:5.52229
[18]	validation-rmse:5.52366
[19]	validation-rmse:5.52900
[20]	validation-rmse:5.52988
[21]	validation-rmse:5.52969
[22]	validation-rmse:5.52872
[23]	validation-rmse:5.53195
[24]	validation-rmse:5.52894
[25]	validation-rmse:5.52702
[26]	validation-rmse:5.53066


  xgb_model.save_model(model_data_path)
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 395.50it/s]
  model.load_model(xgb_model_path)
2025/10/29 23:37:52 INFO mlflow.models.model: Found the following environment variables used during model inference: [DATABRICKS_HOST, DATABRICKS_TOKEN]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.
[I 2025-10-29 23:37:57,056] Trial 0 finished with value: 5.5334711429183985 and parameters: {'max_depth': 40, 'learning_rate': 0.8625543817410922, 'reg_alpha': 0.12593061066249622, 'reg_lambda': 0.049454235173237264, 'min_child_weight': 0.6866535292359801}. Best is trial 0 with value: 5.5334711429183985.


🏃 View run bustling-carp-201 at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/a56e2defd7b842aca5fc14caf74743df
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501
[0]	validation-rmse:8.77707
[1]	validation-rmse:8.47452
[2]	validation-rmse:8.19669
[3]	validation-rmse:7.94182
[4]	validation-rmse:7.70901
[5]	validation-rmse:7.49664
[6]	validation-rmse:7.30245
[7]	validation-rmse:7.12626
[8]	validation-rmse:6.96581
[9]	validation-rmse:6.81994
[10]	validation-rmse:6.68775
[11]	validation-rmse:6.56832
[12]	validation-rmse:6.46039
[13]	validation-rmse:6.36272
[14]	validation-rmse:6.27449
[15]	validation-rmse:6.19394
[16]	validation-rmse:6.12203
[17]	validation-rmse:6.05669
[18]	validation-rmse:5.99838
[19]	validation-rmse:5.94487
[20]	validation-rmse:5.89709
[21]	validation-rmse:5.85472
[22]	validation-rmse:5.81565
[23]	validation-rmse:5.78117
[24]	validation-rmse:5.74956
[25]	validation-rmse:5.72125
[26]	

  xgb_model.save_model(model_data_path)
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 602.32it/s]
  model.load_model(xgb_model_path)
[I 2025-10-29 23:38:59,461] Trial 1 finished with value: 5.410463874732254 and parameters: {'max_depth': 19, 'learning_rate': 0.059264241587996896, 'reg_alpha': 0.21539205131792016, 'reg_lambda': 0.05006540936006931, 'min_child_weight': 6.248180561354165}. Best is trial 1 with value: 5.410463874732254.


🏃 View run fearless-midge-486 at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/123dfb09e6fe45d2832ca97b859ce7f1
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501
[0]	validation-rmse:5.85130
[1]	validation-rmse:5.74043
[2]	validation-rmse:5.72146
[3]	validation-rmse:5.71927
[4]	validation-rmse:5.70906
[5]	validation-rmse:5.70183
[6]	validation-rmse:5.68997
[7]	validation-rmse:5.67941
[8]	validation-rmse:5.67714
[9]	validation-rmse:5.67594
[10]	validation-rmse:5.67850
[11]	validation-rmse:5.67582
[12]	validation-rmse:5.67678
[13]	validation-rmse:5.66184
[14]	validation-rmse:5.65662
[15]	validation-rmse:5.65596
[16]	validation-rmse:5.65571
[17]	validation-rmse:5.65327
[18]	validation-rmse:5.64906
[19]	validation-rmse:5.65130
[20]	validation-rmse:5.64476
[21]	validation-rmse:5.64460
[22]	validation-rmse:5.64547
[23]	validation-rmse:5.64525
[24]	validation-rmse:5.64959
[25]	validation-rmse:5.64695
[26]

  xgb_model.save_model(model_data_path)
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 409.98it/s] 
  model.load_model(xgb_model_path)
[I 2025-10-29 23:39:35,567] Trial 2 finished with value: 5.609300918544036 and parameters: {'max_depth': 5, 'learning_rate': 0.9136840519292247, 'reg_alpha': 0.18820387978911576, 'reg_lambda': 0.007166739666045858, 'min_child_weight': 0.7613210498541186}. Best is trial 1 with value: 5.410463874732254.


🏃 View run intelligent-hawk-221 at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/5af2a33edf384c4b83bb1e38d5f15e14
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501
[0]	validation-rmse:8.77707
[1]	validation-rmse:8.47452
[2]	validation-rmse:8.19669
[3]	validation-rmse:7.94182
[4]	validation-rmse:7.70901
[5]	validation-rmse:7.49664
[6]	validation-rmse:7.30245
[7]	validation-rmse:7.12626
[8]	validation-rmse:6.96581
[9]	validation-rmse:6.81994
[10]	validation-rmse:6.68775
[11]	validation-rmse:6.56832
[12]	validation-rmse:6.46039
[13]	validation-rmse:6.36272
[14]	validation-rmse:6.27449
[15]	validation-rmse:6.19394
[16]	validation-rmse:6.12203
[17]	validation-rmse:6.05669
[18]	validation-rmse:5.99838
[19]	validation-rmse:5.94487
[20]	validation-rmse:5.89709
[21]	validation-rmse:5.85472
[22]	validation-rmse:5.81565
[23]	validation-rmse:5.78117
[24]	validation-rmse:5.74956
[25]	validation-rmse:5.72125
[2

  xgb_model.save_model(model_data_path)
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 359.33it/s] 
  model.load_model(xgb_model_path)


🏃 View run XGBoost Hyperparameter Optimization (Optuna) HW at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/f510c5512cad4c26a904b554ba7b9a93
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501


In [15]:
mlflow.sklearn.autolog(log_models=False)

sampler = TPESampler(seed=42)
study = optuna.create_study(direction="minimize", sampler=sampler)

with mlflow.start_run(run_name="Random Forest Regressor/Optima", nested=True):
    study.optimize(objective_rtr, n_trials=3)
    
    best_params = study.best_params
    best_params["max_depth"] = int(best_params["max_depth"])
    best_params["random_state"] = 42
    
    mlflow.log_params(best_params)
    mlflow.set_tags({
        "project": "NYC Taxi Time Prediction Project",
        "optimizer_engine": "optuna",
        "model_family": "random_forest",
        "feature_set_version": 1})
    
    final_rfr = RandomForestRegressor(**best_params)
    final_rfr.fit(X_train, y_train)
    
    y_pred_rfr = final_rfr.predict(X_val)
    rmse_rfr = root_mean_squared_error(y_val, y_pred_rfr)
    mlflow.log_metric("rmse", rmse_rfr)
    
    pathlib.Path("preprocessor").mkdir(exist_ok=True)
    with open("preprocessor/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
        
    mlflow.log_artifact("preprocessor/preprocessor.b", artifact_path="preprocessor")
    
    feature_names = dv.get_feature_names_out()
    input_example = pd.DataFrame(X_val[:5].toarray(), columns=feature_names)
    
    signature = infer_signature(input_example, y_pred_rfr[:5])
    
    mlflow.sklearn.log_model(
        final_rfr,
        name="model",
        input_example=input_example,
        signature=signature
    )

[I 2025-10-29 23:40:42,499] A new study created in memory with name: no-name-832688ab-c757-41e4-abc1-627c797bf558
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 57.96it/s]  
2025/10/29 23:45:49 INFO mlflow.models.model: Found the following environment variables used during model inference: [DATABRICKS_HOST, DATABRICKS_TOKEN]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.


🏃 View run omniscient-mouse-349 at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/34ab455ba83a4021b329d95658ce67ad
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501


[I 2025-10-29 23:47:35,814] Trial 0 finished with value: 5.542105226681701 and parameters: {'n_estimators': 800, 'max_depth': 45, 'min_samples_split': 15, 'min_samples_leaf': 12, 'max_features': 0.24041677639819287}. Best is trial 0 with value: 5.542105226681701.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 351.87it/s]
2025/10/29 23:48:41 INFO mlflow.models.model: Found the following environment variables used during model inference: [DATABRICKS_HOST, DATABRICKS_TOKEN]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.
[I 2025-10-29 23:48:46,420] Trial 1 finished with value: 5.7194945005435365 and parameters: {'n_estimators': 400, 'max_depth': 5, 'min_samples_split': 18, 'min_samples_leaf': 13, 'max_features': 0.737265320016441}. Best is trial 0 with value: 5.542105226681701.


🏃 View run amusing-colt-378 at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/a0d6b93630e24b88955cdd9654e30688
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 163.79it/s]
2025/10/29 23:50:07 INFO mlflow.models.model: Found the following environment variables used during model inference: [DATABRICKS_HOST, DATABRICKS_TOKEN]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.
[I 2025-10-29 23:50:37,606] Trial 2 finished with value: 5.417472457905124 and parameters: {'n_estimators': 100, 'max_depth': 47, 'min_samples_split': 17, 'min_samples_leaf': 5, 'max_features': 0.26364247048639056}. Best is trial 2 with value: 5.417472457905124.


🏃 View run rare-lamb-257 at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/787b11aec9dd46b18ce88fdc2c8095e6
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 77.10it/s]  
2025/10/29 23:52:09 INFO mlflow.models.model: Found the following environment variables used during model inference: [DATABRICKS_HOST, DATABRICKS_TOKEN]. Please check if you need to set them when deploying the model. To disable this message, set environment variable `MLFLOW_RECORD_ENV_VARS_IN_MODEL_LOGGING` to `false`.


🏃 View run Random Forest Regressor/Optima at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501/runs/1ca2902103634337891559731f771bad
🧪 View experiment at: https://dbc-8e655ea0-6144.cloud.databricks.com/ml/experiments/2695805265605501


In [19]:
model_registry = "workspace.default.nyc-taxi-model"
runs = mlflow.search_runs(
    experiment_names=[EXPERIMENT_NAME],
    order_by=["metrics.rmse ASC"],
    output_format="list"
)

if len(runs) > 0:
    best_run = runs[0]
    second_best = runs[1]
    

In [22]:
result_champ = mlflow.register_model(
    model_uri=f"runs:/{best_run.info.run_id}/model",
    name=model_registry
)

result_chall = mlflow.register_model(
    model_uri=f"runs:/{second_best.info.run_id}/model",
    name=model_registry
)

Registered model 'workspace.default.nyc-taxi-model' already exists. Creating a new version of this model...
Downloading artifacts: 100%|██████████| 7/7 [00:02<00:00,  3.38it/s]
Uploading artifacts: 100%|██████████| 8/8 [00:02<00:00,  2.95it/s]
Created version '6' of model 'workspace.default.nyc-taxi-model'.
Registered model 'workspace.default.nyc-taxi-model' already exists. Creating a new version of this model...
Downloading artifacts: 100%|██████████| 7/7 [00:02<00:00,  3.33it/s]
Uploading artifacts: 100%|██████████| 8/8 [00:03<00:00,  2.08it/s]
Created version '7' of model 'workspace.default.nyc-taxi-model'.


In [None]:
from mlflow import MlflowClient

client = MlflowClient()
model_chall_version = result_chall.version
model_champ_version = result_champ.version
challenger_alias ="Challenger"
champ_alias ="Champion"


client.set_registered_model_alias(
    name=model_registry,
    alias=challenger_alias,
    version=model_chall_version
)

client.set_registered_model_alias(
    name=model_registry,
    alias=champ_alias,
    version= model_champ_version
)

In [24]:
import mlflow.pyfunc

champion_model = mlflow.xgboost.load_model(f"models:/{model_registry}@{champ_alias}")
challenger_model = mlflow.xgboost.load_model(f"models:/{model_registry}@{challenger_alias}")

Downloading artifacts: 100%|██████████| 8/8 [00:04<00:00,  1.92it/s]
  model.load_model(xgb_model_path)
Downloading artifacts: 100%|██████████| 8/8 [00:02<00:00,  4.00it/s]
  model.load_model(xgb_model_path)


In [25]:
y_chal_pred = challenger_model.predict(test)
y_champ_pred = champion_model.predict(test)

rmse_challenger = root_mean_squared_error(y_test, y_chal_pred)
rmse_champ = root_mean_squared_error(y_test, y_champ_pred)

In [26]:
if rmse_challenger > rmse_champ:
    client.set_registered_model_alias(
    name=model_registry,
    alias=champ_alias,
    version= model_chall_version
)
    
else:
    print("Champion stays on top")

Champion stays on top


El champion sigue siendo el champion ya que tuvo una mejor métrica de rmse con datos que no se habían visto previamente