## NYC Taxi Data Experimnt Tracking

In [2]:
!python -V

Python 3.11.5


In [1]:
import subprocess
import time
from IPython.display import IFrame
from sklearn.model_selection import train_test_split, GridSearchCV
from tqdm import tqdm



In [2]:
import requests
import pickle
import numpy as np

import pandas as pd

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.svm import LinearSVR



In [5]:
train_raw_data = pd.read_parquet('data/green_tripdata_2024-03.parquet')
val_raw_data = pd.read_parquet('data/green_tripdata_2024-04.parquet')

In [6]:
train_raw_data.head()

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,extra,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge
0,2,2024-03-01 00:10:52,2024-03-01 00:26:12,N,1.0,129,226,1.0,1.72,12.8,1.0,0.5,3.06,0.0,,1.0,18.36,1.0,1.0,0.0
1,2,2024-03-01 00:22:21,2024-03-01 00:35:15,N,1.0,130,218,1.0,3.25,17.7,1.0,0.5,0.0,0.0,,1.0,20.2,2.0,1.0,0.0
2,2,2024-03-01 00:45:27,2024-03-01 01:04:32,N,1.0,255,107,2.0,4.58,23.3,1.0,0.5,3.5,0.0,,1.0,32.05,1.0,1.0,2.75
3,1,2024-03-01 00:02:00,2024-03-01 00:23:45,N,1.0,181,71,1.0,0.0,22.5,0.0,1.5,0.0,0.0,,1.0,24.0,1.0,1.0,0.0
4,2,2024-03-01 00:16:45,2024-03-01 00:23:25,N,1.0,95,135,1.0,1.15,8.6,1.0,0.5,1.0,0.0,,1.0,12.1,1.0,1.0,0.0


In [7]:
def process_dataframe(data):
    data.lpep_dropoff_datetime = pd.to_datetime(data.lpep_dropoff_datetime)
    data.lpep_pickup_datetime = pd.to_datetime(data.lpep_pickup_datetime)

    data['duration'] = data.lpep_dropoff_datetime - data.lpep_pickup_datetime
    data.duration = data.duration.apply(lambda td: td.total_seconds() / 60)
    data = data[(data.duration >= 1) & (data.duration <= 60)] # 
    
    data['PULocationID'].astype(str, copy=False)
    data['DOLocationID'].astype(str, copy=False)
    
    return data

In [8]:
num_features = ['trip_distance', 'extra', 'fare_amount']
cat_features = ['PULocationID', 'DOLocationID']

In [9]:
X_train = process_dataframe(train_raw_data)[num_features + cat_features]
X_val = process_dataframe(val_raw_data)[num_features + cat_features] 

y_train = process_dataframe(train_raw_data)['duration']
y_val = process_dataframe(val_raw_data)['duration'] 

In [10]:
X_val.isnull().sum()

trip_distance    0
extra            0
fare_amount      0
PULocationID     0
DOLocationID     0
dtype: int64

## Simple Experiment

In [11]:
lr = Ridge()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



5.398267721662138

In [12]:
np.mean(y_pred)

14.131639061800543

## MLflow tracking

In [13]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [14]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("green_taxi")

<Experiment: artifact_location='file:///C:/Users/user/PycharmProjects/qwerty/mlruns/1', creation_time=1721082632454, experiment_id='1', last_update_time=1721082632454, lifecycle_stage='active', name='green_taxi', tags={}>

In [15]:
sqlite_db_path = "sqlite:///mlflow.db"


In [16]:
pwd

'C:\\Users\\user\\PycharmProjects\\qwerty'

In [19]:
with mlflow.start_run() as run:
    mlflow.set_tag("workspace", "inclass")
    mlflow.log_param("model_name", "Lasso")
    mlflow.log_param("train_data", "green_tripdata_2024-03.parquet")
    
    alpha = 0.1
    lr = Lasso(alpha=alpha)
    mlflow.log_param("alpha", alpha)
    
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_val)
    
    rmse = mean_squared_error(y_val, y_pred)
    print('rmse:', rmse)
    mlflow.log_metric("rmse", rmse)
    print(f"Run ID: {run.info.run_id}")

    mlflow.end_run()
print('Experiment recorded (chunk executed)')

rmse: 29.148545881396135
Run ID: 1573793f0aab498dbf4127637a5d2554
Experiment recorded (chunk executed)


In [20]:
import logging
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("green_taxi")
logging.basicConfig(level=logging.DEBUG)


In [17]:
import os

# List the contents of the 'mlruns' directory
mlruns_path = 'mlruns'
if os.path.exists(mlruns_path):
    contents = os.listdir(mlruns_path)
    print("Contents of 'mlruns':")
    for item in contents:
        print(item)
else:
    print(f"The directory '{mlruns_path}' does not exist.")


Contents of 'mlruns':
.trash
0
461779665582921331
models


In [18]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('mlflow.db')

# Create a cursor object
cur = conn.cursor()

# Execute a query to check the tables in the database
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")

# Fetch and print the results
tables = cur.fetchall()
print("Tables in the database:", tables)
query = "SELECT * FROM experiments"
experiments_df = pd.read_sql_query(query, conn)

print('query res:', experiments_df)
# Close the connection
conn.close()


Tables in the database: [('experiments',), ('alembic_version',), ('experiment_tags',), ('tags',), ('registered_models',), ('runs',), ('registered_model_tags',), ('model_version_tags',), ('model_versions',), ('latest_metrics',), ('metrics',), ('registered_model_aliases',), ('datasets',), ('inputs',), ('input_tags',), ('params',), ('trace_info',), ('trace_tags',), ('trace_request_metadata',)]
query res:    experiment_id        name  \
0              0     Default   
1              1  green_taxi   

                                   artifact_location lifecycle_stage  \
0                                mlflow-artifacts:/0          active   
1  file:///C:/Users/user/PycharmProjects/qwerty/m...          active   

   creation_time  last_update_time  
0  1721069481320     1721069481320  
1  1721082632454     1721082632454  


In [None]:
### ------------- XGBOOST -----------


In [19]:
from xgboost import XGBRegressor


In [20]:
with mlflow.start_run() as run:
    mlflow.set_tag("workspace", "inclass")
    mlflow.log_param("model_name", "XGBoost")
    mlflow.log_param("train_data", "green_tripdata_2024-03.parquet")
    
    # Set parameters for XGBoost
    params = {
        "objective": "reg:squarederror",
        "learning_rate": 0.1,
        "max_depth": 6,
        "n_estimators": 100
    }
    
    # Log the parameters
    for param, value in params.items():
        mlflow.log_param(param, value)
    
    # Train the model
    model = XGBRegressor(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    # Calculate RMSE
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    print('rmse xgboost-1', rmse)
    mlflow.log_metric("rmse", rmse)
    
    # Log the model
    mlflow.xgboost.log_model(model, "model")
    
    # Print run_id for verification
    print(f"Run ID: {run.info.run_id}")

print('Experiment-2 recorded (chunk executed)')




rmse xgboost-1 3.0408032407468317
Run ID: d9bf5d20894141b2b786d45498825a54
Experiment-2 recorded (chunk executed)


In [21]:
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from xgboost import XGBRegressor, DMatrix, train as xgb_train



In [22]:
# Define the parameter grid for GridSearchCV
# Define the hyperparameter space
space = {
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'max_depth': hp.choice('max_depth', [4, 6, 8]),
    'gamma': hp.loguniform('gamma', -3, 0),  # loguniform parameter
    'num_boost_round': hp.choice('num_boost_round', [100, 200, 300]),
    'early_stopping_rounds': hp.choice('early_stopping_rounds', [10, 20, 30])
}

In [23]:
# Define the objective function
def objective(params):
    with mlflow.start_run() as run:
        mlflow.set_tag("workspace", "inclass")
        mlflow.log_param("model_name", "XGBoost")
        mlflow.log_param("train_data", "green_tripdata_2024-03.parquet")
        
        # Split the data into DMatrix format for XGBoost
        dtrain = DMatrix(X_train, label=y_train)
        dval = DMatrix(X_val, label=y_val)
        
        # Extract the parameters for XGBoost
        xgb_params = {key: params[key] for key in params if key not in ['num_boost_round', 'early_stopping_rounds']}
        
        # Train the model with early stopping
        evals = [(dval, 'eval'), (dtrain, 'train')]
        model = xgb_train(
            xgb_params, 
            dtrain, 
            num_boost_round=params['num_boost_round'], 
            evals=evals, 
            early_stopping_rounds=params['early_stopping_rounds'],
            verbose_eval=False
        )
        
        # Predict on the validation set using the best iteration
        best_iteration = model.best_iteration if model.best_iteration is not None else params['num_boost_round']
        y_pred = model.predict(dval, iteration_range=(0, best_iteration))
        
        # Calculate RMSE
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        
        # Log the model
        mlflow.xgboost.log_model(model, "model")
        
        # Log the parameters
        for param, value in params.items():
            mlflow.log_param(param, value)
        
        # Print run_id for verification
        print(f"Run ID: {run.info.run_id}, RMSE: {rmse}")

        return {'loss': rmse, 'status': STATUS_OK}

# Perform hyperparameter optimization
trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50, trials=trials)

print("Best parameters:", best)

print('Hyperparameter optimization and experiment logging completed')

  0%|                                                                                                                   | 0/50 [00:00<?, ?trial/s, best loss=?]





Run ID: bce7ec135342427786be774b6357f0b3, RMSE: 3.01484313959433                                                                                               
  2%|█▊                                                                                         | 1/50 [00:02<02:12,  2.70s/trial, best loss: 3.01484313959433]







Run ID: d492a82470a54215a57f0a70e7add3a0, RMSE: 3.232038294231299                                                                                              
  4%|███▋                                                                                       | 2/50 [00:05<02:06,  2.63s/trial, best loss: 3.01484313959433]







Run ID: 4d76b078e41f40ed94b7bd07ab97490e, RMSE: 3.2265783685751677                                                                                             
  6%|█████▍                                                                                     | 3/50 [00:07<02:04,  2.66s/trial, best loss: 3.01484313959433]







Run ID: 7428431c18744c0b93a06bafdb9e747a, RMSE: 3.0793930522102175                                                                                             
  8%|███████▎                                                                                   | 4/50 [00:10<02:03,  2.69s/trial, best loss: 3.01484313959433]







Run ID: 56c9551ce3d247ccb598a6a2e1edd7f1, RMSE: 3.0073205162480443                                                                                             
 10%|████████▉                                                                                | 5/50 [00:13<02:03,  2.74s/trial, best loss: 3.0073205162480443]







Run ID: d2c5d41c65524388bdd18e8ad385317d, RMSE: 2.9754679365078545                                                                                             
 12%|██████████▋                                                                              | 6/50 [00:17<02:14,  3.05s/trial, best loss: 2.9754679365078545]







Run ID: 10e84b330c5647639679cdc44618b880, RMSE: 3.0732172771879105                                                                                             
 14%|████████████▍                                                                            | 7/50 [00:19<02:06,  2.93s/trial, best loss: 2.9754679365078545]







Run ID: cb61562fb36e4830a51250245e146c8a, RMSE: 3.4521389404031355                                                                                             
 16%|██████████████▏                                                                          | 8/50 [00:22<01:56,  2.78s/trial, best loss: 2.9754679365078545]







Run ID: 4c2f3226836540f891f331aaa4fb0ceb, RMSE: 3.0055876206014314                                                                                             
 18%|████████████████                                                                         | 9/50 [00:25<01:56,  2.84s/trial, best loss: 2.9754679365078545]







Run ID: 9fcff2efd25549a88e6b9face8601ab5, RMSE: 3.0341576731962827                                                                                             
 20%|█████████████████▌                                                                      | 10/50 [00:28<01:53,  2.83s/trial, best loss: 2.9754679365078545]







Run ID: 40b7e9df67cf4b1386b45d847eac721e, RMSE: 2.9972247554560947                                                                                             
 22%|███████████████████▎                                                                    | 11/50 [00:31<01:56,  2.99s/trial, best loss: 2.9754679365078545]







Run ID: dc3d5fd5dd4345d4966a0f3de1e4d717, RMSE: 2.990562616783642                                                                                              
 24%|█████████████████████                                                                   | 12/50 [00:34<01:51,  2.95s/trial, best loss: 2.9754679365078545]







Run ID: ba7fc5810a2d4215a8556ff885afca27, RMSE: 2.9863883077108575                                                                                             
 26%|██████████████████████▉                                                                 | 13/50 [00:37<01:57,  3.17s/trial, best loss: 2.9754679365078545]







Run ID: 8b12da859f544508859b36a95a2da4fb, RMSE: 3.0167669855053307                                                                                             
 28%|████████████████████████▋                                                               | 14/50 [00:41<01:53,  3.17s/trial, best loss: 2.9754679365078545]







Run ID: a7f639202b444420bcdb6e9d64386c70, RMSE: 3.0246422713579615                                                                                             
 30%|██████████████████████████▍                                                             | 15/50 [00:44<01:49,  3.14s/trial, best loss: 2.9754679365078545]







Run ID: cce49f52241f464aa5f21691ecf7f2c7, RMSE: 2.984166933997451                                                                                              
 32%|████████████████████████████▏                                                           | 16/50 [00:47<01:43,  3.05s/trial, best loss: 2.9754679365078545]







Run ID: 05ce86cfbbb74cd7aca664bff8f43a0d, RMSE: 3.021457201689418                                                                                              
 34%|█████████████████████████████▉                                                          | 17/50 [00:50<01:40,  3.05s/trial, best loss: 2.9754679365078545]







Run ID: 9a9dfd1303e742ff8850066d554f6da5, RMSE: 3.0309830190460967                                                                                             
 36%|███████████████████████████████▋                                                        | 18/50 [00:53<01:37,  3.05s/trial, best loss: 2.9754679365078545]







Run ID: 8ee39e8a61aa4ea4ba4604f8a6a5035e, RMSE: 3.003831654013616                                                                                              
 38%|█████████████████████████████████▍                                                      | 19/50 [00:55<01:29,  2.89s/trial, best loss: 2.9754679365078545]







Run ID: 41a46aceeb23499c909560870aa338dd, RMSE: 3.1181310715545876                                                                                             
 40%|███████████████████████████████████▏                                                    | 20/50 [00:58<01:25,  2.84s/trial, best loss: 2.9754679365078545]







Run ID: bc70c3a01bbb422ea9579c35fa505397, RMSE: 2.993232175659157                                                                                              
 42%|████████████████████████████████████▉                                                   | 21/50 [01:01<01:26,  3.00s/trial, best loss: 2.9754679365078545]







Run ID: d2b376d1662f4cbaaf6f4087e2b9d997, RMSE: 2.9954255324803643                                                                                             
 44%|██████████████████████████████████████▋                                                 | 22/50 [01:05<01:27,  3.11s/trial, best loss: 2.9754679365078545]







Run ID: d55153c98bde4ccba0c2584f18a9d572, RMSE: 3.0005930012323665                                                                                             
 46%|████████████████████████████████████████▍                                               | 23/50 [01:08<01:26,  3.19s/trial, best loss: 2.9754679365078545]









Run ID: aa6f1caef7da49b7961d7a75ee365e5e, RMSE: 3.0798336177734202                                                                                             
 48%|██████████████████████████████████████████▏                                             | 24/50 [01:12<01:28,  3.42s/trial, best loss: 2.9754679365078545]





Run ID: 0366fd88d0a649b8825017d10efe5cbd, RMSE: 3.2518824002075783                                                                                             
 50%|████████████████████████████████████████████                                            | 25/50 [01:15<01:25,  3.44s/trial, best loss: 2.9754679365078545]







Run ID: 8c7ecb3d18904d518f8cd7fb880adea4, RMSE: 3.00061507092604                                                                                               
 52%|█████████████████████████████████████████████▊                                          | 26/50 [01:19<01:26,  3.58s/trial, best loss: 2.9754679365078545]







Run ID: 5eaec91f24314c128eaeb392b8b669fb, RMSE: 3.0166445497488397                                                                                             
 54%|███████████████████████████████████████████████▌                                        | 27/50 [01:22<01:14,  3.26s/trial, best loss: 2.9754679365078545]







Run ID: 2146c1c672354f6187ae6f6c8de95410, RMSE: 3.010504499179992                                                                                              
 56%|█████████████████████████████████████████████████▎                                      | 28/50 [01:26<01:15,  3.41s/trial, best loss: 2.9754679365078545]







Run ID: be8a0f8e25034664b9158d14212219fe, RMSE: 3.0135704453544263                                                                                             
 58%|███████████████████████████████████████████████████                                     | 29/50 [01:28<01:05,  3.13s/trial, best loss: 2.9754679365078545]







Run ID: 6b7c1ec176864b3b90ef04788f9d35f0, RMSE: 2.992418651174891                                                                                              
 60%|████████████████████████████████████████████████████▊                                   | 30/50 [01:32<01:05,  3.27s/trial, best loss: 2.9754679365078545]







Run ID: c2e9a3f89fb54af7b62f28c56e8b20fa, RMSE: 3.7684438877237967                                                                                             
 62%|██████████████████████████████████████████████████████▌                                 | 31/50 [01:34<00:58,  3.07s/trial, best loss: 2.9754679365078545]







Run ID: e10970b332ac4fc1ba50d2dd98a182b7, RMSE: 3.0655164689141516                                                                                             
 64%|████████████████████████████████████████████████████████▎                               | 32/50 [01:37<00:54,  3.00s/trial, best loss: 2.9754679365078545]







Run ID: f9af7e70d1cf488cb1b01fcacd227156, RMSE: 2.9933067623014313                                                                                             
 66%|██████████████████████████████████████████████████████████                              | 33/50 [01:41<00:52,  3.11s/trial, best loss: 2.9754679365078545]







Run ID: 6116cde95a204b97afd2c68c159b6f7f, RMSE: 3.4395234143691633                                                                                             
 68%|███████████████████████████████████████████████████████████▊                            | 34/50 [01:44<00:49,  3.08s/trial, best loss: 2.9754679365078545]







Run ID: 428efe5421a94c7d8dc1c8624af709cc, RMSE: 2.987181962787642                                                                                              
 70%|█████████████████████████████████████████████████████████████▌                          | 35/50 [01:47<00:46,  3.13s/trial, best loss: 2.9754679365078545]







Run ID: c89dd5d7af274ad2b19add7b6aa68834, RMSE: 3.028147423425457                                                                                              
 72%|███████████████████████████████████████████████████████████████▎                        | 36/50 [01:50<00:42,  3.01s/trial, best loss: 2.9754679365078545]







Run ID: 28c3d692d6e0430ea18c19f709a911d6, RMSE: 3.25623083776254                                                                                               
 74%|█████████████████████████████████████████████████████████████████                       | 37/50 [01:52<00:38,  2.96s/trial, best loss: 2.9754679365078545]







Run ID: 0442684afe664b3c9e2f25a43f74ebab, RMSE: 2.9875230983872574                                                                                             
 76%|██████████████████████████████████████████████████████████████████▉                     | 38/50 [01:56<00:37,  3.16s/trial, best loss: 2.9754679365078545]







Run ID: df5325ca753b468d99ec4443def49044, RMSE: 2.9992501443896824                                                                                             
 78%|████████████████████████████████████████████████████████████████████▋                   | 39/50 [01:59<00:35,  3.19s/trial, best loss: 2.9754679365078545]







Run ID: 39371c97d2f844fb8334040fe1d4ccb9, RMSE: 3.277196016766832                                                                                              
 80%|██████████████████████████████████████████████████████████████████████▍                 | 40/50 [02:02<00:29,  2.99s/trial, best loss: 2.9754679365078545]







Run ID: 7697b74d3fe84847b5823895d57e7c5f, RMSE: 3.005297152585824                                                                                              
 82%|████████████████████████████████████████████████████████████████████████▏               | 41/50 [02:05<00:26,  2.99s/trial, best loss: 2.9754679365078545]







Run ID: 8e0cfe8db0bb44bbb52b410a5d3821ff, RMSE: 3.0065371971354335                                                                                             
 84%|█████████████████████████████████████████████████████████████████████████▉              | 42/50 [02:09<00:25,  3.24s/trial, best loss: 2.9754679365078545]







Run ID: c1674f4dc9f44ed58e58255f1bf2b49f, RMSE: 3.032297771060551                                                                                              
 86%|███████████████████████████████████████████████████████████████████████████▋            | 43/50 [02:12<00:22,  3.18s/trial, best loss: 2.9754679365078545]







Run ID: 2379f7698711481ab8c3fb4955394708, RMSE: 3.3811975978429474                                                                                             
 88%|█████████████████████████████████████████████████████████████████████████████▍          | 44/50 [02:15<00:19,  3.22s/trial, best loss: 2.9754679365078545]







Run ID: 6e8ae133769e4d11ae8c9e582fd80ab5, RMSE: 3.0575644569719236                                                                                             
 90%|███████████████████████████████████████████████████████████████████████████████▏        | 45/50 [02:18<00:16,  3.22s/trial, best loss: 2.9754679365078545]







Run ID: f5c5c5cc3fe64c00a3406b47423c1a1f, RMSE: 3.0167878076877908                                                                                             
 92%|████████████████████████████████████████████████████████████████████████████████▉       | 46/50 [02:21<00:12,  3.17s/trial, best loss: 2.9754679365078545]







Run ID: 101690934d1f4d17ab979cf9137ed7e9, RMSE: 3.0130361865028696                                                                                             
 94%|██████████████████████████████████████████████████████████████████████████████████▋     | 47/50 [02:24<00:09,  3.03s/trial, best loss: 2.9754679365078545]







Run ID: 376b588b186e489095698d5c6d2dfd2a, RMSE: 3.003389292754948                                                                                              
 96%|████████████████████████████████████████████████████████████████████████████████████▍   | 48/50 [02:27<00:06,  3.19s/trial, best loss: 2.9754679365078545]







Run ID: 98f8bc29d0a240e280d0f6c79ea8ba83, RMSE: 2.991871461768265                                                                                              
 98%|██████████████████████████████████████████████████████████████████████████████████████▏ | 49/50 [02:31<00:03,  3.23s/trial, best loss: 2.9754679365078545]







Run ID: d702000c1a994b9e8ea32fb2bc4c1320, RMSE: 3.2453762065109095                                                                                             
100%|████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:34<00:00,  3.10s/trial, best loss: 2.9754679365078545]
Best parameters: {'early_stopping_rounds': 1, 'gamma': 0.09075014030075113, 'learning_rate': 0.04447507493826804, 'max_depth': 2, 'num_boost_round': 2}
Hyperparameter optimization and experiment logging completed






### Load model

In [18]:
logged_model_id = 'runs:/d2c5d41c65524388bdd18e8ad385317d/model'
loaded_model = mlflow.pyfunc.load_model(logged_model_id)

In [19]:
type(loaded_model)

mlflow.pyfunc.PyFuncModel

In [21]:
from mlflow import MlflowClient


In [23]:
client = MlflowClient()
client.get_experiment_by_name('green_taxi')


<Experiment: artifact_location='file:///C:/Users/user/PycharmProjects/qwerty/mlruns/1', creation_time=1721082632454, experiment_id='1', last_update_time=1721082632454, lifecycle_stage='active', name='green_taxi', tags={}>

In [24]:
runs = client.search_runs(experiment_ids='1')

In [25]:
len(runs)

54

In [27]:
for run in runs:
    print(f"run_id = {run.info.run_id}, rmse = {run.data.metrics}")

run_id = d702000c1a994b9e8ea32fb2bc4c1320, rmse = {'rmse': 3.2453762065109095}
run_id = 98f8bc29d0a240e280d0f6c79ea8ba83, rmse = {'rmse': 2.991871461768265}
run_id = 376b588b186e489095698d5c6d2dfd2a, rmse = {'rmse': 3.003389292754948}
run_id = 101690934d1f4d17ab979cf9137ed7e9, rmse = {'rmse': 3.0130361865028696}
run_id = f5c5c5cc3fe64c00a3406b47423c1a1f, rmse = {'rmse': 3.0167878076877908}
run_id = 6e8ae133769e4d11ae8c9e582fd80ab5, rmse = {'rmse': 3.0575644569719236}
run_id = 2379f7698711481ab8c3fb4955394708, rmse = {'rmse': 3.3811975978429474}
run_id = c1674f4dc9f44ed58e58255f1bf2b49f, rmse = {'rmse': 3.032297771060551}
run_id = 8e0cfe8db0bb44bbb52b410a5d3821ff, rmse = {'rmse': 3.0065371971354335}
run_id = 7697b74d3fe84847b5823895d57e7c5f, rmse = {'rmse': 3.005297152585824}
run_id = 39371c97d2f844fb8334040fe1d4ccb9, rmse = {'rmse': 3.277196016766832}
run_id = df5325ca753b468d99ec4443def49044, rmse = {'rmse': 2.9992501443896824}
run_id = 0442684afe664b3c9e2f25a43f74ebab, rmse = {'rmse'

In [39]:
# Filter runs with rmse < thr (threshold)
thr = 2.99
filtered_runs = []
for run in runs:
    metrics = run.data.metrics
    if 'rmse' in metrics and metrics['rmse'] < thr:
        filtered_runs.append({
            'run_id': run.info.run_id,
            'experiment_id': run.info.experiment_id,
            'rmse': metrics['rmse'],
            'status': run.info.status,
            'start_time': run.info.start_time,
            'end_time': run.info.end_time,
            'ordered_by':["metrics.rmse ASC"],
        })
print(len(filtered_runs)) # 5 best options

# Convert the filtered runs to a DataFrame
filtered_runs_df = pd.DataFrame(filtered_runs)

5


In [40]:
filtered_runs_df.head()

Unnamed: 0,run_id,experiment_id,rmse,status,start_time,end_time,ordered_by
0,0442684afe664b3c9e2f25a43f74ebab,1,2.987523,FINISHED,1721085449236,1721085452838,[metrics.rmse ASC]
1,428efe5421a94c7d8dc1c8624af709cc,1,2.987182,FINISHED,1721085440426,1721085443651,[metrics.rmse ASC]
2,cce49f52241f464aa5f21691ecf7f2c7,1,2.984167,FINISHED,1721085380614,1721085383443,[metrics.rmse ASC]
3,ba7fc5810a2d4215a8556ff885afca27,1,2.986388,FINISHED,1721085370712,1721085374367,[metrics.rmse ASC]
4,d2c5d41c65524388bdd18e8ad385317d,1,2.975468,FINISHED,1721085349944,1721085353580,[metrics.rmse ASC]
