In [13]:
import os
import mlflow
import mlflow.sklearn
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, root_mean_squared_error
import numpy as np
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from dagshub import get_repo_bucket_client


In [15]:
# Create the directory if it doesn't exist
!mkdir -p ../data

# Download files using curl
!curl -o ../data/green_tripdata_2024-01.parquet https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2024-01.parquet
!curl -o ../data/green_tripdata_2024-02.parquet https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2024-02.parquet

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1330k  100 1330k    0     0  5776k      0 --:--:-- --:--:-- --:--:-- 5784k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1253k  100 1253k    0     0  6264k      0 --:--:-- --:--:-- --:--:-- 6237k


In [16]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)
    df = df[(df.duration >= 1) & (df.duration <= 60)]
    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    return df

In [17]:
df_train = read_dataframe('../data/green_tripdata_2024-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2024-02.parquet')
df_test = read_dataframe('../data/green_tripdata_2024-03.parquet')

categorical = ['PU_DO']
numerical = ['trip_distance']
target = 'duration'


In [18]:
dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)
y_train = df_train[target].values

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)
y_val = df_val[target].values

test_dicts = df_test[categorical + numerical].to_dict(orient='records')
X_test = dv.transform(test_dicts)
y_test = df_test[target].values

In [22]:
import os
import mlflow
import dagshub
from mlflow.tracking import MlflowClient

# Configurar DagsHub y MLflow
dagshub.init(repo_owner="JuanPab2009", repo_name="nys-taxi-time-prediction", mlflow=True)

# Obtener la URI de seguimiento de MLflow
mlflow_tracking_uri = mlflow.get_tracking_uri()
print(f"MLflow Tracking URI: {mlflow_tracking_uri}")

# Configurar MLflow para usar la URI de DagsHub
mlflow.set_tracking_uri(mlflow_tracking_uri)

# Nombre del experimento
experiment_name = "nys-taxi-experiment"

# Crear o obtener el experimento
client = MlflowClient()
try:
    experiment = client.create_experiment(experiment_name)
except mlflow.exceptions.MlflowException:
    experiment = client.get_experiment_by_name(experiment_name)

if experiment:
    print(f"Experiment ID: {experiment.experiment_id}")
    print(f"Artifact Location: {experiment.artifact_location}")
    
    # Configurar el experimento activo
    mlflow.set_experiment(experiment_name)
else:
    print("Failed to create or retrieve the experiment.")

MLflow Tracking URI: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow
Experiment ID: 0
Artifact Location: mlflow-artifacts:/263051c210234e5a9f3b7186f93060c2


In [23]:
# Función para ejecutar experimentos
def objective(params, model_class, X_train, y_train, X_val, y_val):
    with mlflow.start_run(nested=True):
        model = model_class(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_params(params)
        mlflow.log_metric("rmse", rmse)
        return {'loss': rmse, 'status': STATUS_OK}

In [24]:
# Espacios de búsqueda para hiperparámetros
gb_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 50)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'max_depth': scope.int(hp.quniform('max_depth', 3, 10, 1))
}

rf_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 50)),
    'max_depth': scope.int(hp.quniform('max_depth', 3, 20, 1)),
    'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1))
}


In [32]:
# Experimentos
with mlflow.start_run(run_name="Parent Experiment"):
    # Gradient Boost
    gb_trials = Trials()
    best_gb = fmin(
        fn=lambda params: objective(params, GradientBoostingRegressor, X_train, y_train, X_val, y_val),
        space=gb_space,
        algo=tpe.suggest,
        max_evals=20,
        trials=gb_trials
    )
    
    # Random Forest
    rf_trials = Trials()
    best_rf = fmin(
        fn=lambda params: objective(params, RandomForestRegressor, X_train, y_train, X_val, y_val),
        space=rf_space,
        algo=tpe.suggest,
        max_evals=20,
        trials=rf_trials
    )

    # Seleccionar el mejor modelo
    gb_rmse = min(trial['result']['loss'] for trial in gb_trials.trials)
    rf_rmse = min(trial['result']['loss'] for trial in rf_trials.trials)

    if gb_rmse < rf_rmse:
        best_model = GradientBoostingRegressor(**best_gb)
        best_model_name = "GradientBoost"
    else:
        best_model = RandomForestRegressor(**best_rf)
        best_model_name = "RandomForest"

    # Entrenar el mejor modelo con todos los datos
    best_model.fit(X_train, y_train)

    # Registrar el mejor modelo
    mlflow.sklearn.log_model(best_model, "nyc-taxi-model")
    
    # Asignar el alias 'challenger'
    client = mlflow.tracking.MlflowClient()
    model_version = client.create_model_version("nyc-taxi-model", f"runs:/{mlflow.active_run().info.run_id}/nyc-taxi-model")
    client.set_registered_model_alias("nyc-taxi-model", "challenger", model_version.version)

  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]

2024/09/20 20:33:20 INFO mlflow.tracking._tracking_service.client: 🏃 View run clean-grub-687 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/75ebdc5382ac4143b0e88c3e7e46f749.

2024/09/20 20:33:20 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



  5%|▌         | 1/20 [00:14<04:33, 14.38s/trial, best loss: 5.352838042950961]

2024/09/20 20:33:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run carefree-mare-111 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/a2d546f503804d6da1a5d8d2f7f08c0c.

2024/09/20 20:33:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 10%|█         | 2/20 [00:19<02:45,  9.22s/trial, best loss: 5.352838042950961]

2024/09/20 20:33:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run handsome-sow-988 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/068980cedabc4ce390467f74c24f805e.

2024/09/20 20:33:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 15%|█▌        | 3/20 [00:23<01:54,  6.72s/trial, best loss: 5.352838042950961]

2024/09/20 20:33:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run clean-midge-922 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/0b25d4d187fb4704afe552149e4f4a43.

2024/09/20 20:33:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 20%|██        | 4/20 [00:28<01:33,  5.82s/trial, best loss: 5.352838042950961]

2024/09/20 20:33:45 INFO mlflow.tracking._tracking_service.client: 🏃 View run mercurial-eel-264 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/76a2923137464050b2d56d9e96f37429.

2024/09/20 20:33:45 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 25%|██▌       | 5/20 [00:38<01:53,  7.57s/trial, best loss: 5.351624742801585]

2024/09/20 20:33:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run orderly-snail-155 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/b794e359e9004fbdb8d621af1d2d5f3b.

2024/09/20 20:33:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 30%|███       | 6/20 [00:48<01:56,  8.31s/trial, best loss: 5.351624742801585]

2024/09/20 20:34:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run bright-snipe-221 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/117b284b655e4334bb8ae8ec4197d6c9.

2024/09/20 20:34:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 35%|███▌      | 7/20 [00:54<01:35,  7.37s/trial, best loss: 5.351624742801585]

2024/09/20 20:34:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run rogue-dog-986 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/5cc2c3698c1a40c5807afec9dd59b72e.

2024/09/20 20:34:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 40%|████      | 8/20 [01:01<01:28,  7.40s/trial, best loss: 5.351624742801585]

2024/09/20 20:34:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run useful-hen-398 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/c3b7f2c872e14167892ed4920843e958.

2024/09/20 20:34:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 45%|████▌     | 9/20 [01:04<01:06,  6.00s/trial, best loss: 5.351624742801585]

2024/09/20 20:34:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run whimsical-swan-748 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/8514d11606f24e158df63bd136142946.

2024/09/20 20:34:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 50%|█████     | 10/20 [01:09<00:56,  5.67s/trial, best loss: 5.351624742801585]

2024/09/20 20:34:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run upbeat-tern-324 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/c387bb27750a41f297136937169d486d.

2024/09/20 20:34:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 55%|█████▌    | 11/20 [01:14<00:50,  5.66s/trial, best loss: 5.343211886046483]

2024/09/20 20:34:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run shivering-ant-326 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/627797df7d59407797510d300bcb9353.

2024/09/20 20:34:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 60%|██████    | 12/20 [01:20<00:45,  5.65s/trial, best loss: 5.343211886046483]

2024/09/20 20:34:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run righteous-skink-238 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/db3d45885fa4486b95b5f97b2fa789a9.

2024/09/20 20:34:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 65%|██████▌   | 13/20 [01:35<00:59,  8.51s/trial, best loss: 5.331316718839417]

2024/09/20 20:34:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run spiffy-bat-590 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/a5e1579ede95487ea1041cb266274167.

2024/09/20 20:34:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 70%|███████   | 14/20 [01:44<00:51,  8.59s/trial, best loss: 5.331316718839417]

2024/09/20 20:34:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run lyrical-fawn-940 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/aaad3313c0ca4b418bc9b856bf965ba9.

2024/09/20 20:34:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 75%|███████▌  | 15/20 [01:48<00:36,  7.36s/trial, best loss: 5.331316718839417]

2024/09/20 20:35:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run dapper-ram-840 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/4c76fee2c1ae434a9ff719b7dbd6cbf1.

2024/09/20 20:35:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 80%|████████  | 16/20 [01:55<00:28,  7.17s/trial, best loss: 5.331316718839417]

2024/09/20 20:35:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run adorable-worm-993 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/2b7c20d55c744585ac59ce3a82fc6a67.

2024/09/20 20:35:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 85%|████████▌ | 17/20 [02:03<00:22,  7.38s/trial, best loss: 5.331316718839417]

2024/09/20 20:35:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run painted-koi-155 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/87eec46a1ad94b8d8c9b6ea05fb0da45.

2024/09/20 20:35:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 90%|█████████ | 18/20 [02:17<00:18,  9.43s/trial, best loss: 5.331316718839417]

2024/09/20 20:35:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run abundant-panda-212 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/d88cd77e92b9426c9b0427e2696c79e6.

2024/09/20 20:35:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 95%|█████████▌| 19/20 [02:24<00:08,  8.58s/trial, best loss: 5.331316718839417]

2024/09/20 20:35:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run sassy-stork-544 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/9f6d80a9d0c547dab403aec265ae0d62.

2024/09/20 20:35:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



100%|██████████| 20/20 [02:29<00:00,  7.47s/trial, best loss: 5.331316718839417]
  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]

2024/09/20 20:35:47 INFO mlflow.tracking._tracking_service.client: 🏃 View run fearless-bass-369 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/41694da8707142a4bd7f428ed7a630c0.

2024/09/20 20:35:47 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



  5%|▌         | 1/20 [00:11<03:31, 11.11s/trial, best loss: 5.398530509652921]

2024/09/20 20:35:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run lyrical-loon-304 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/c304f1c86eda47caad4e205903818f8f.

2024/09/20 20:35:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 10%|█         | 2/20 [00:20<03:03, 10.19s/trial, best loss: 5.398530509652921]

2024/09/20 20:36:05 INFO mlflow.tracking._tracking_service.client: 🏃 View run polite-ape-161 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/e74409c05477421a9d3e3acb3166df9f.

2024/09/20 20:36:05 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 15%|█▌        | 3/20 [00:29<02:40,  9.42s/trial, best loss: 5.398530509652921]

2024/09/20 20:36:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run stylish-goat-769 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/d63bd961040249daaa15b5696a846495.

2024/09/20 20:36:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 20%|██        | 4/20 [00:31<01:45,  6.59s/trial, best loss: 5.398530509652921]

2024/09/20 20:36:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run rare-koi-53 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/0e5b30dcaace4f53866ba16bbc404aed.

2024/09/20 20:36:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 25%|██▌       | 5/20 [01:03<03:57, 15.85s/trial, best loss: 5.395186124400277]

2024/09/20 20:36:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run chill-mouse-748 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/85f50f03a052442db662e920da953bff.

2024/09/20 20:36:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 30%|███       | 6/20 [01:12<03:08, 13.47s/trial, best loss: 5.395186124400277]

2024/09/20 20:37:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run sedate-slug-913 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/641be806ee8b45808879a1867eff48d9.

2024/09/20 20:37:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 35%|███▌      | 7/20 [01:25<02:52, 13.23s/trial, best loss: 5.395186124400277]

2024/09/20 20:37:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run secretive-shrimp-252 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/716373c8569d4604b37b0bdba122c684.

2024/09/20 20:37:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 40%|████      | 8/20 [01:43<02:58, 14.90s/trial, best loss: 5.395186124400277]

2024/09/20 20:37:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run adventurous-mink-310 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/97a0bf38fc0d4489b64a84b134128819.

2024/09/20 20:37:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 45%|████▌     | 9/20 [02:06<03:10, 17.29s/trial, best loss: 5.395186124400277]

2024/09/20 20:38:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run righteous-moth-809 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/e43d7b722e2b436fb1d5f85746423eab.

2024/09/20 20:38:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 50%|█████     | 10/20 [02:31<03:17, 19.70s/trial, best loss: 5.395186124400277]

2024/09/20 20:38:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run lyrical-vole-426 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/8e39f61ce1234a8f9d4dbf6bb249565e.

2024/09/20 20:38:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 55%|█████▌    | 11/20 [02:36<02:15, 15.08s/trial, best loss: 5.395186124400277]

2024/09/20 20:38:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run intrigued-robin-583 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/af88edc5629f4204b144c430d5e3e562.

2024/09/20 20:38:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 60%|██████    | 12/20 [02:59<02:21, 17.66s/trial, best loss: 5.395186124400277]

2024/09/20 20:38:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run tasteful-shrimp-601 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/52bc498c9b4b46eab26e3c910c693e81.

2024/09/20 20:38:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 65%|██████▌   | 13/20 [03:07<01:43, 14.73s/trial, best loss: 5.395186124400277]

2024/09/20 20:38:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run honorable-snail-978 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/d8aca56769284a81830451fbc244d127.

2024/09/20 20:38:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 70%|███████   | 14/20 [03:12<01:10, 11.67s/trial, best loss: 5.395186124400277]

2024/09/20 20:39:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run nosy-shoat-746 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/ac4fc2bfa6bb44b7840b0475bd5f2a6b.

2024/09/20 20:39:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 75%|███████▌  | 15/20 [03:33<01:13, 14.72s/trial, best loss: 5.395186124400277]

2024/09/20 20:39:47 INFO mlflow.tracking._tracking_service.client: 🏃 View run bald-bug-491 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/375f9a871be3443d8b3aea91bd4d10ac.

2024/09/20 20:39:47 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 80%|████████  | 16/20 [04:11<01:26, 21.51s/trial, best loss: 5.395186124400277]

2024/09/20 20:39:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run intrigued-midge-190 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/aefc199594ef4739a567acca6dde76ab.

2024/09/20 20:39:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 85%|████████▌ | 17/20 [04:21<00:54, 18.15s/trial, best loss: 5.395186124400277]

2024/09/20 20:40:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run resilient-hare-712 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/474ff73114774dfdacd1869d485b0f33.

2024/09/20 20:40:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 90%|█████████ | 18/20 [04:39<00:36, 18.17s/trial, best loss: 5.393711741436198]

2024/09/20 20:40:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run welcoming-worm-947 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/7a40a6baf963419a9d1f46d611fce061.

2024/09/20 20:40:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



 95%|█████████▌| 19/20 [04:42<00:13, 13.66s/trial, best loss: 5.393711741436198]

2024/09/20 20:40:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run gentle-midge-156 at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/bfc82800e59d430f9a1289fff43c5c80.

2024/09/20 20:40:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.



100%|██████████| 20/20 [04:46<00:00, 14.30s/trial, best loss: 5.393711741436198]

2024/09/20 20:40:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run Parent Experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0/runs/82798e22e337477f84da8e56899809c8.
2024/09/20 20:40:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/JuanPab2009/nys-taxi-time-prediction.mlflow/#/experiments/0.





InvalidParameterError: The 'max_depth' parameter of GradientBoostingRegressor must be an int in the range [1, inf) or None. Got np.float64(8.0) instead.

In [34]:
import mlflow
from mlflow.exceptions import MlflowException
from sklearn.metrics import root_mean_squared_error

# Función para cargar un modelo de forma segura
def safe_load_model(model_name, stage):
    try:
        return mlflow.sklearn.load_model(f"models:/{model_name}@{stage}")
    except MlflowException as e:
        print(f"Error loading {stage} model: {e}")
        return None

# Evaluar modelos en el conjunto de prueba
champion_model = safe_load_model("nyc-taxi-model", "champion")
challenger_model = safe_load_model("nyc-taxi-model", "challenger")

if champion_model and challenger_model:
    champion_rmse = root_mean_squared_error(y_test, champion_model.predict(X_test))
    challenger_rmse = root_mean_squared_error(y_test, challenger_model.predict(X_test))

    print(f"Champion RMSE: {champion_rmse}")
    print(f"Challenger RMSE: {challenger_rmse}")

    # Decidir si promover el challenger a champion
    if challenger_rmse < champion_rmse * 0.95:
        print("El challenger supera significativamente al champion y debería ser promovido.")
        client.set_registered_model_alias("nyc-taxi-model", "champion", model_version.version)
    else:
        print("El challenger no supera significativamente al champion. Mantenemos el champion actual.")
else:
    print("No se pudieron cargar uno o ambos modelos. Verifica que existan en el Model Registry.")

# Si no hay un modelo champion, podríamos querer promover el challenger automáticamente
if not champion_model and challenger_model:
    print("No existe un modelo champion. Promoviendo el challenger a champion.")
    client.set_registered_model_alias("nyc-taxi-model", "champion", model_version.version)

# Asegúrate de que el challenger siempre se registre
if challenger_model:
    client.set_registered_model_alias("nyc-taxi-model", "challenger", model_version.version)
else:
    print("No se pudo registrar el modelo challenger. Verifica el proceso de entrenamiento y registro.")

Error loading champion model: INVALID_PARAMETER_VALUE: Response: {'error_code': 'INVALID_PARAMETER_VALUE'}
Error loading challenger model: INVALID_PARAMETER_VALUE: Response: {'error_code': 'INVALID_PARAMETER_VALUE'}
No se pudieron cargar uno o ambos modelos. Verifica que existan en el Model Registry.
No se pudo registrar el modelo challenger. Verifica el proceso de entrenamiento y registro.


In [None]:
# Decidir si promover el challenger a champion
if challenger_rmse < champion_rmse * 0.95:
    print("El challenger supera significativamente al champion y debería ser promovido.")
    client.set_registered_model_alias("nyc-taxi-model", "champion", model_version.version)
else:
    print("El challenger no supera significativamente al champion. Mantenemos el champion actual.")


In [None]:
# Subir datos a DagsHub
s3 = get_repo_bucket_client("JuanPab2009/nys-taxi-time-prediction")

s3.upload_file(
    Bucket="nys-taxi-time-prediction",
    Filename="../data/green_tripdata_2024-03.parquet",
    Key="test_data.parquet",
)