In [18]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///new.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [19]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression, Lasso 

from sklearn.metrics import root_mean_squared_error

In [20]:
import mlflow
mlflow.set_tracking_uri("sqlite:///new.db")
mlflow.set_experiment("nyc-taxi-experiment")

2024/06/09 19:53:20 INFO mlflow.tracking.fluent: Experiment with name 'nyc-taxi-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='/Users/kamilakolpashnikova/Dropbox/Mac/Documents/Jupyter_notebooks/mlops-2024/02-experiment-tracking/mlruns/1', creation_time=1717977200591, experiment_id='1', last_update_time=1717977200591, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [21]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)
    df['tpep_pickup_datetime'] = pd.to_datetime(df['tpep_pickup_datetime'])
    df['tpep_dropoff_datetime'] = pd.to_datetime(df['tpep_dropoff_datetime'])
    
    df['duration'] = df['tpep_dropoff_datetime'] - df['tpep_pickup_datetime']

    df['duration'] = df['duration'].dt.total_seconds() / 60
    
    df = df[(df.duration>=1) & (df.duration <= 60)]
    
    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [22]:
df_train = read_dataframe('data/yellow_tripdata_2023-01.parquet')
df_val = read_dataframe('data/yellow_tripdata_2023-02.parquet')
df_val.head()

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,Airport_fee,duration
0,1,2023-02-01 00:32:53,2023-02-01 00:34:34,2.0,0.3,1.0,N,142,163,2,4.4,3.5,0.5,0.0,0.0,1.0,9.4,2.5,0.0,1.683333
3,1,2023-02-01 00:29:33,2023-02-01 01:01:38,0.0,18.8,1.0,N,132,26,1,70.9,2.25,0.5,0.0,0.0,1.0,74.65,0.0,1.25,32.083333
4,2,2023-02-01 00:12:28,2023-02-01 00:25:46,1.0,3.22,1.0,N,161,145,1,17.0,1.0,0.5,3.3,0.0,1.0,25.3,2.5,0.0,13.3
5,1,2023-02-01 00:52:40,2023-02-01 01:07:18,1.0,5.1,1.0,N,148,236,1,21.9,3.5,0.5,5.35,0.0,1.0,32.25,2.5,0.0,14.633333
6,1,2023-02-01 00:12:39,2023-02-01 00:40:36,1.0,8.9,1.0,N,137,244,1,41.5,3.5,0.5,3.5,0.0,1.0,50.0,2.5,0.0,27.95


In [23]:
train_dict = df_train[['PULocationID', 'DOLocationID']].to_dict(orient='records')
val_dict = df_val[['PULocationID', 'DOLocationID']].to_dict(orient='records')

In [24]:
# Create the DictVectorizer
dv = DictVectorizer()

# Fit and transform the data
X_train = dv.fit_transform(train_dict)
X_val = dv.transform(val_dict)

In [25]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [26]:
alpha = 0.01

lr = Lasso(alpha)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

8.138118733385994

In [31]:
with mlflow.start_run():
    mlflow.set_tag("developer","Qfl3x")
    
    mlflow.log_param("train-data-path", "data/green_tripdata_2021-01.parquet")
    mlflow.log_param("val-data-path", "data/green_tripdata_2021-02.parquet")
    
    alpha = 0.01
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)
    
    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

In [32]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [12]:


train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=100, # was 1000 in the original code
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:7.85020                           
[1]	validation-rmse:7.33480                           
[2]	validation-rmse:6.71042                           
[3]	validation-rmse:6.58682                           
[4]	validation-rmse:6.50465                           
[5]	validation-rmse:6.41720                           
[6]	validation-rmse:6.15263                           
[7]	validation-rmse:6.08645                           
[8]	validation-rmse:6.03566                           
[9]	validation-rmse:5.97975                           
[10]	validation-rmse:5.90972                          
[11]	validation-rmse:5.78353                          
[12]	validation-rmse:5.76575                          
[13]	validation-rmse:5.72657                          
[14]	validation-rmse:5.65600                          
[15]	validation-rmse:5.56602                          
[16]	validation-rmse:5.55760                          
[17]	validation-rmse:5.51289                          
[18]	valid




[2]	validation-rmse:8.80715                                                    
[3]	validation-rmse:8.59567                                                    
[4]	validation-rmse:8.44180                                                    
[5]	validation-rmse:8.33175                                                    
[6]	validation-rmse:8.23966                                                    
[7]	validation-rmse:8.17887                                                    
[8]	validation-rmse:8.12912                                                    
[9]	validation-rmse:8.09004                                                    
[10]	validation-rmse:8.04343                                                   
[11]	validation-rmse:8.01296                                                   
[12]	validation-rmse:7.98865                                                   
[13]	validation-rmse:7.96442                                                   
[14]	validation-rmse:7.94137            




[0]	validation-rmse:7.37459                                                    
[1]	validation-rmse:6.52010                                                    
[2]	validation-rmse:6.23679                                                    
[3]	validation-rmse:6.05218                                                    
[4]	validation-rmse:5.83820                                                    
[5]	validation-rmse:5.74734                                                    
[6]	validation-rmse:5.70169                                                    
[7]	validation-rmse:5.51593                                                    
[8]	validation-rmse:5.43751                                                    
[9]	validation-rmse:5.40897                                                    
[10]	validation-rmse:5.36159                                                   
[11]	validation-rmse:5.35266                                                   
[12]	validation-rmse:5.34788            




[0]	validation-rmse:8.53577                                                    
[1]	validation-rmse:7.60926                                                    
[2]	validation-rmse:6.60798                                                    
[3]	validation-rmse:6.09269                                                    
[4]	validation-rmse:5.93395                                                    
[5]	validation-rmse:5.64831                                                    
[6]	validation-rmse:5.59158                                                    
[7]	validation-rmse:5.46264                                                    
[8]	validation-rmse:5.43903                                                    
[9]	validation-rmse:5.41832                                                    
[10]	validation-rmse:5.35085                                                   
[11]	validation-rmse:5.34244                                                   
[12]	validation-rmse:5.33514            




[1]	validation-rmse:8.00363                                                     
[2]	validation-rmse:7.78060                                                     
[3]	validation-rmse:7.63879                                                     
[4]	validation-rmse:7.46606                                                     
[5]	validation-rmse:7.36703                                                     
[6]	validation-rmse:7.29495                                                     
[7]	validation-rmse:7.22941                                                     
[8]	validation-rmse:7.06864                                                     
[9]	validation-rmse:7.04009                                                     
[10]	validation-rmse:6.99177                                                    
[11]	validation-rmse:6.85545                                                    
[12]	validation-rmse:6.82238                                                    
[13]	validation-rmse:6.80236




[1]	validation-rmse:9.06768                                                     
[2]	validation-rmse:8.74349                                                     
[3]	validation-rmse:8.50015                                                     
[4]	validation-rmse:8.30196                                                     
[5]	validation-rmse:8.13842                                                     
[6]	validation-rmse:8.01908                                                     
[7]	validation-rmse:7.93273                                                     
[8]	validation-rmse:7.86309                                                     
[9]	validation-rmse:7.80524                                                     
[10]	validation-rmse:7.75707                                                    
[11]	validation-rmse:7.66210                                                    
[12]	validation-rmse:7.62461                                                    
[13]	validation-rmse:7.59509




[1]	validation-rmse:7.67069                                                     
[2]	validation-rmse:7.31684                                                     
[3]	validation-rmse:7.13935                                                     
[4]	validation-rmse:7.00258                                                     
[5]	validation-rmse:6.89870                                                     
[6]	validation-rmse:6.83205                                                     
[7]	validation-rmse:6.74897                                                     
[8]	validation-rmse:6.43633                                                     
[9]	validation-rmse:6.28282                                                     
[10]	validation-rmse:6.25836                                                    
[11]	validation-rmse:6.22648                                                    
[12]	validation-rmse:6.12363                                                    
[13]	validation-rmse:6.09924




[1]	validation-rmse:9.25523                                                     
[2]	validation-rmse:8.93668                                                     
[3]	validation-rmse:8.67573                                                     
[4]	validation-rmse:8.40718                                                     
[5]	validation-rmse:8.21388                                                     
[6]	validation-rmse:8.06562                                                     
[7]	validation-rmse:7.88251                                                     
[8]	validation-rmse:7.77235                                                     
[9]	validation-rmse:7.68460                                                     
[10]	validation-rmse:7.60673                                                    
[11]	validation-rmse:7.54040                                                    
[12]	validation-rmse:7.46930                                                    
[13]	validation-rmse:7.42100




[0]	validation-rmse:9.53511                                                     
[1]	validation-rmse:9.08050                                                     
[2]	validation-rmse:8.69809                                                     
[3]	validation-rmse:8.29064                                                     
[4]	validation-rmse:8.00871                                                     
[5]	validation-rmse:7.79140                                                     
[6]	validation-rmse:7.41642                                                     
[7]	validation-rmse:7.10686                                                     
[8]	validation-rmse:6.85520                                                     
[9]	validation-rmse:6.74635                                                     
[10]	validation-rmse:6.65080                                                    
[11]	validation-rmse:6.44888                                                    
[12]	validation-rmse:6.38530




[0]	validation-rmse:8.68999                                                     
[1]	validation-rmse:7.66801                                                     
[2]	validation-rmse:7.27364                                                     
[3]	validation-rmse:7.03843                                                     
[4]	validation-rmse:6.82122                                                     
[5]	validation-rmse:6.72575                                                     
[6]	validation-rmse:6.28430                                                     
[7]	validation-rmse:6.22909                                                     
[8]	validation-rmse:6.12075                                                     
[9]	validation-rmse:6.09069                                                     
[10]	validation-rmse:6.06092                                                    
[11]	validation-rmse:6.03319                                                    
[12]	validation-rmse:5.88459




[0]	validation-rmse:7.09697                                                      
[1]	validation-rmse:5.94860                                                      
[2]	validation-rmse:5.75095                                                      
[3]	validation-rmse:5.63344                                                      
[4]	validation-rmse:5.47567                                                      
[5]	validation-rmse:5.43425                                                      
[6]	validation-rmse:5.28539                                                      
[7]	validation-rmse:5.27507                                                      
[8]	validation-rmse:5.26077                                                      
[9]	validation-rmse:5.24309                                                      
[10]	validation-rmse:5.23628                                                     
[11]	validation-rmse:5.22533                                                     
[12]	validation-




[0]	validation-rmse:8.77897                                                      
[1]	validation-rmse:7.58397                                                      
[2]	validation-rmse:7.07242                                                      
[3]	validation-rmse:6.40278                                                      
[4]	validation-rmse:6.01802                                                      
[5]	validation-rmse:5.89273                                                      
[6]	validation-rmse:5.67376                                                      
[7]	validation-rmse:5.61992                                                      
[8]	validation-rmse:5.58277                                                      
[9]	validation-rmse:5.46127                                                      
[10]	validation-rmse:5.44302                                                     
[11]	validation-rmse:5.37883                                                     
[12]	validation-




[0]	validation-rmse:8.28828                                                      
[1]	validation-rmse:6.86598                                                      
[2]	validation-rmse:6.49975                                                      
[3]	validation-rmse:6.31405                                                      
[4]	validation-rmse:5.85482                                                      
[5]	validation-rmse:5.79532                                                      
[6]	validation-rmse:5.74455                                                      
[7]	validation-rmse:5.59304                                                      
[8]	validation-rmse:5.57292                                                      
[9]	validation-rmse:5.55489                                                      
[10]	validation-rmse:5.50420                                                     
[11]	validation-rmse:5.48282                                                     
[12]	validation-




[0]	validation-rmse:9.30253                                                      
[1]	validation-rmse:8.70578                                                      
[2]	validation-rmse:8.19029                                                      
[3]	validation-rmse:7.85624                                                      
[4]	validation-rmse:7.31688                                                      
[5]	validation-rmse:6.90670                                                      
[6]	validation-rmse:6.75394                                                      
[7]	validation-rmse:6.63223                                                      
[8]	validation-rmse:6.35683                                                      
[9]	validation-rmse:6.28651                                                      
[10]	validation-rmse:6.23094                                                     
[11]	validation-rmse:6.18919                                                     
[12]	validation-




[0]	validation-rmse:9.49356                                                      
[1]	validation-rmse:9.00826                                                      
[2]	validation-rmse:8.61078                                                      
[3]	validation-rmse:8.15395                                                      
[4]	validation-rmse:7.86761                                                      
[5]	validation-rmse:7.49489                                                      
[6]	validation-rmse:7.30609                                                      
[7]	validation-rmse:6.96290                                                      
[8]	validation-rmse:6.67642                                                      
[9]	validation-rmse:6.43834                                                      
[10]	validation-rmse:6.24404                                                     
[11]	validation-rmse:6.11509                                                     
[12]	validation-




[0]	validation-rmse:9.61734                                                      
[1]	validation-rmse:9.22642                                                      
[2]	validation-rmse:8.83462                                                      
[3]	validation-rmse:8.53661                                                      
[4]	validation-rmse:8.21570                                                      
[5]	validation-rmse:7.99492                                                      
[6]	validation-rmse:7.66261                                                      
[7]	validation-rmse:7.40465                                                      
[8]	validation-rmse:7.25823                                                      
[9]	validation-rmse:7.01664                                                      
[10]	validation-rmse:6.91499                                                     
[11]	validation-rmse:6.71618                                                     
[12]	validation-




[0]	validation-rmse:9.73739                                                      
[1]	validation-rmse:9.43387                                                      
[2]	validation-rmse:9.16447                                                      
[3]	validation-rmse:8.90658                                                      
[4]	validation-rmse:8.68415                                                      
[5]	validation-rmse:8.42221                                                      
[6]	validation-rmse:8.23740                                                      
[7]	validation-rmse:8.07319                                                      
[8]	validation-rmse:7.87868                                                      
[9]	validation-rmse:7.75052                                                      
[10]	validation-rmse:7.51260                                                     
[11]	validation-rmse:7.29899                                                     
[12]	validation-




[0]	validation-rmse:6.98733                                                      
[1]	validation-rmse:6.06315                                                      
[2]	validation-rmse:5.81722                                                      
[3]	validation-rmse:5.34785                                                      
[4]	validation-rmse:5.32654                                                      
[5]	validation-rmse:5.30503                                                      
[6]	validation-rmse:5.26330                                                      
[7]	validation-rmse:5.25422                                                      
[8]	validation-rmse:5.22577                                                      
[9]	validation-rmse:5.22316                                                      
[10]	validation-rmse:5.22168                                                     
[11]	validation-rmse:5.21650                                                     
[12]	validation-




[0]	validation-rmse:9.74548                                                      
[1]	validation-rmse:9.44869                                                      
[2]	validation-rmse:9.18077                                                      
[3]	validation-rmse:8.94105                                                      
[4]	validation-rmse:8.66195                                                      
[5]	validation-rmse:8.40984                                                      
[6]	validation-rmse:8.22988                                                      
[7]	validation-rmse:7.99090                                                      
[8]	validation-rmse:7.84658                                                      
[9]	validation-rmse:7.62352                                                      
[10]	validation-rmse:7.42232                                                     
[11]	validation-rmse:7.23789                                                     
[12]	validation-




[0]	validation-rmse:7.07271                                                      
[1]	validation-rmse:5.96601                                                      
[2]	validation-rmse:5.82231                                                      
[3]	validation-rmse:5.63277                                                      
[4]	validation-rmse:5.51182                                                      
[5]	validation-rmse:5.46105                                                      
[6]	validation-rmse:5.37397                                                      
[7]	validation-rmse:5.35356                                                      
[8]	validation-rmse:5.34810                                                      
[9]	validation-rmse:5.34095                                                      
[10]	validation-rmse:5.33706                                                     
[11]	validation-rmse:5.31562                                                     
[12]	validation-




[0]	validation-rmse:8.45577                                                      
[1]	validation-rmse:7.58402                                                      
[2]	validation-rmse:6.70536                                                      
[3]	validation-rmse:6.42573                                                      
[4]	validation-rmse:6.08017                                                      
[5]	validation-rmse:5.98184                                                      
[6]	validation-rmse:5.64539                                                      
[7]	validation-rmse:5.61007                                                      
[8]	validation-rmse:5.44008                                                      
[9]	validation-rmse:5.42505                                                      
[10]	validation-rmse:5.35720                                                     
[11]	validation-rmse:5.34900                                                     
[12]	validation-




[0]	validation-rmse:7.96446                                                     
[1]	validation-rmse:7.10884                                                     
[2]	validation-rmse:6.04556                                                     
[3]	validation-rmse:5.84283                                                     
[4]	validation-rmse:5.71408                                                     
[5]	validation-rmse:5.43336                                                     
[6]	validation-rmse:5.40972                                                     
[7]	validation-rmse:5.38677                                                     
[8]	validation-rmse:5.29802                                                     
[9]	validation-rmse:5.29220                                                     
[10]	validation-rmse:5.28501                                                    
[11]	validation-rmse:5.25012                                                    
[12]	validation-rmse:5.24641




[0]	validation-rmse:8.84568                                                      
[1]	validation-rmse:8.05240                                                      
[2]	validation-rmse:7.52953                                                      
[3]	validation-rmse:6.77217                                                      
[4]	validation-rmse:6.26211                                                      
[5]	validation-rmse:5.93573                                                      
[6]	validation-rmse:5.82816                                                      
[7]	validation-rmse:5.63667                                                      
[8]	validation-rmse:5.58699                                                      
[9]	validation-rmse:5.47128                                                      
[10]	validation-rmse:5.44725                                                     
[11]	validation-rmse:5.38059                                                     
[12]	validation-




[0]	validation-rmse:8.98750                                                      
[1]	validation-rmse:8.25235                                                      
[2]	validation-rmse:7.73879                                                      
[3]	validation-rmse:7.03334                                                      
[4]	validation-rmse:6.52760                                                      
[5]	validation-rmse:6.25673                                                      
[6]	validation-rmse:6.11895                                                      
[7]	validation-rmse:5.85722                                                      
[8]	validation-rmse:5.78977                                                      
[9]	validation-rmse:5.62375                                                      
[10]	validation-rmse:5.53008                                                     
[11]	validation-rmse:5.50334                                                     
[12]	validation-




[0]	validation-rmse:9.06828                                                     
[1]	validation-rmse:8.13796                                                     
[2]	validation-rmse:7.63354                                                     
[3]	validation-rmse:6.95536                                                     
[4]	validation-rmse:6.60231                                                     
[5]	validation-rmse:6.42596                                                     
[6]	validation-rmse:6.09628                                                     
[7]	validation-rmse:6.01148                                                     
[8]	validation-rmse:5.80293                                                     
[9]	validation-rmse:5.75753                                                     
[10]	validation-rmse:5.62683                                                    
[11]	validation-rmse:5.59632                                                    
[12]	validation-rmse:5.57300




[0]	validation-rmse:8.01956                                                     
[1]	validation-rmse:7.14973                                                     
[2]	validation-rmse:6.12306                                                     
[3]	validation-rmse:5.92817                                                     
[4]	validation-rmse:5.54414                                                     
[5]	validation-rmse:5.49373                                                     
[6]	validation-rmse:5.35325                                                     
[7]	validation-rmse:5.33817                                                     
[8]	validation-rmse:5.32193                                                     
[9]	validation-rmse:5.26653                                                     
[10]	validation-rmse:5.26208                                                    
[11]	validation-rmse:5.25601                                                    
[12]	validation-rmse:5.23124




[1]	validation-rmse:7.16830                                                     
[2]	validation-rmse:6.89887                                                     
[3]	validation-rmse:6.77287                                                     
[4]	validation-rmse:6.58594                                                     
[5]	validation-rmse:6.45966                                                     
[6]	validation-rmse:6.00655                                                     
[7]	validation-rmse:5.96919                                                     
[8]	validation-rmse:5.92929                                                     
[9]	validation-rmse:5.85701                                                     
[10]	validation-rmse:5.81871                                                    
[11]	validation-rmse:5.67212                                                    
[12]	validation-rmse:5.65060                                                    
[13]	validation-rmse:5.60610




[0]	validation-rmse:8.31385                                                     
[1]	validation-rmse:6.79801                                                     
[2]	validation-rmse:6.38178                                                     
[3]	validation-rmse:5.84821                                                     
[4]	validation-rmse:5.75151                                                     
[5]	validation-rmse:5.58296                                                     
[6]	validation-rmse:5.54957                                                     
[7]	validation-rmse:5.52130                                                     
[8]	validation-rmse:5.44376                                                     
[9]	validation-rmse:5.43298                                                     
[10]	validation-rmse:5.41990                                                    
[11]	validation-rmse:5.40705                                                    
[12]	validation-rmse:5.34083




[0]	validation-rmse:9.42024                                                     
[1]	validation-rmse:8.90457                                                     
[2]	validation-rmse:8.49171                                                     
[3]	validation-rmse:7.94937                                                     
[4]	validation-rmse:7.59902                                                     
[5]	validation-rmse:7.38332                                                     
[6]	validation-rmse:7.06157                                                     
[7]	validation-rmse:6.93258                                                     
[8]	validation-rmse:6.83002                                                     
[9]	validation-rmse:6.59957                                                     
[10]	validation-rmse:6.53229                                                    
[11]	validation-rmse:6.47451                                                    
[12]	validation-rmse:6.31939




[0]	validation-rmse:8.70345                                                     
[1]	validation-rmse:7.86610                                                     
[2]	validation-rmse:6.91895                                                     
[3]	validation-rmse:6.46347                                                     
[4]	validation-rmse:6.27474                                                     
[5]	validation-rmse:5.87809                                                     
[6]	validation-rmse:5.79788                                                     
[7]	validation-rmse:5.58580                                                     
[8]	validation-rmse:5.55218                                                     
[9]	validation-rmse:5.43166                                                     
[10]	validation-rmse:5.41341                                                    
[11]	validation-rmse:5.40061                                                    
[12]	validation-rmse:5.33474




[0]	validation-rmse:9.00165                                                     
[1]	validation-rmse:8.31402                                                     
[2]	validation-rmse:7.73413                                                     
[3]	validation-rmse:7.45680                                                     
[4]	validation-rmse:7.27921                                                     
[5]	validation-rmse:7.16205                                                     
[6]	validation-rmse:7.04397                                                     
[7]	validation-rmse:6.97157                                                     
[8]	validation-rmse:6.65157                                                     
[9]	validation-rmse:6.42284                                                     
[10]	validation-rmse:6.39017                                                    
[11]	validation-rmse:6.36003                                                    
[12]	validation-rmse:6.33291




[0]	validation-rmse:9.05864                                                     
[1]	validation-rmse:8.33725                                                     
[2]	validation-rmse:7.83206                                                     
[3]	validation-rmse:7.10968                                                     
[4]	validation-rmse:6.58916                                                     
[5]	validation-rmse:6.22715                                                     
[6]	validation-rmse:6.05040                                                     
[7]	validation-rmse:5.95175                                                     
[8]	validation-rmse:5.88075                                                     
[9]	validation-rmse:5.70019                                                     
[10]	validation-rmse:5.66492                                                    
[11]	validation-rmse:5.54416                                                    
[12]	validation-rmse:5.52246




[0]	validation-rmse:9.67235                                                     
[1]	validation-rmse:9.32313                                                     
[2]	validation-rmse:8.98635                                                     
[3]	validation-rmse:8.71012                                                     
[4]	validation-rmse:8.39234                                                     
[5]	validation-rmse:8.17615                                                     
[6]	validation-rmse:7.98897                                                     
[7]	validation-rmse:7.68716                                                     
[8]	validation-rmse:7.42261                                                     
[9]	validation-rmse:7.19511                                                     
[10]	validation-rmse:7.04524                                                    
[11]	validation-rmse:6.94070                                                    
[12]	validation-rmse:6.75662




[0]	validation-rmse:9.34492                                                     
[1]	validation-rmse:8.77319                                                     
[2]	validation-rmse:8.30815                                                     
[3]	validation-rmse:7.95094                                                     
[4]	validation-rmse:7.44681                                                     
[5]	validation-rmse:7.12085                                                     
[6]	validation-rmse:6.76523                                                     
[7]	validation-rmse:6.61654                                                     
[8]	validation-rmse:6.33036                                                     
[9]	validation-rmse:6.10799                                                     
[10]	validation-rmse:5.97914                                                    
[11]	validation-rmse:5.92110                                                    
[12]	validation-rmse:5.77935




[0]	validation-rmse:8.84402                                                     
[1]	validation-rmse:7.78770                                                     
[2]	validation-rmse:7.28331                                                     
[3]	validation-rmse:6.69537                                                     
[4]	validation-rmse:6.47927                                                     
[5]	validation-rmse:6.04165                                                     
[6]	validation-rmse:5.86976                                                     
[7]	validation-rmse:5.80810                                                     
[8]	validation-rmse:5.75414                                                     
[9]	validation-rmse:5.70862                                                     
[10]	validation-rmse:5.68030                                                    
[11]	validation-rmse:5.51951                                                    
[12]	validation-rmse:5.50583




[0]	validation-rmse:7.60953                                                     
[1]	validation-rmse:6.42888                                                     
[2]	validation-rmse:6.25275                                                     
[3]	validation-rmse:6.10933                                                     
[4]	validation-rmse:5.84138                                                     
[5]	validation-rmse:5.79490                                                     
[6]	validation-rmse:5.64611                                                     
[7]	validation-rmse:5.62182                                                     
[8]	validation-rmse:5.40598                                                     
[9]	validation-rmse:5.39334                                                     
[10]	validation-rmse:5.37697                                                    
[11]	validation-rmse:5.34322                                                    
[12]	validation-rmse:5.33607




[2]	validation-rmse:8.83899                                                     
[3]	validation-rmse:8.62664                                                     
[4]	validation-rmse:8.47116                                                     
[5]	validation-rmse:8.35783                                                     
[6]	validation-rmse:8.26266                                                     
[7]	validation-rmse:8.19942                                                     
[8]	validation-rmse:8.14815                                                     
[9]	validation-rmse:8.10787                                                     
[10]	validation-rmse:8.07330                                                    
[11]	validation-rmse:8.03847                                                    
[12]	validation-rmse:8.01326                                                    
[13]	validation-rmse:7.97744                                                    
[14]	validation-rmse:7.95412




[1]	validation-rmse:9.03447                                                     
[2]	validation-rmse:8.65806                                                     
[3]	validation-rmse:8.27504                                                     
[4]	validation-rmse:8.04325                                                     
[5]	validation-rmse:7.83943                                                     
[6]	validation-rmse:7.64997                                                     
[7]	validation-rmse:7.53214                                                     
[8]	validation-rmse:7.44114                                                     
[9]	validation-rmse:7.33665                                                     
[10]	validation-rmse:7.17588                                                    
[11]	validation-rmse:7.12372                                                    
[12]	validation-rmse:7.08082                                                    
[13]	validation-rmse:7.02324




[1]	validation-rmse:9.68963                                                     
[2]	validation-rmse:9.52251                                                     
[3]	validation-rmse:9.37076                                                     
[4]	validation-rmse:9.22845                                                     
[5]	validation-rmse:9.10021                                                     
[6]	validation-rmse:8.98145                                                     
[7]	validation-rmse:8.86853                                                     
[8]	validation-rmse:8.76886                                                     
[9]	validation-rmse:8.67152                                                     
[10]	validation-rmse:8.58613                                                    
[11]	validation-rmse:8.50878                                                    
[12]	validation-rmse:8.43493                                                    
[13]	validation-rmse:8.36792




[0]	validation-rmse:8.76751                                                     
[1]	validation-rmse:8.01895                                                     
[2]	validation-rmse:7.15727                                                     
[3]	validation-rmse:6.90748                                                     
[4]	validation-rmse:6.75991                                                     
[5]	validation-rmse:6.37296                                                     
[6]	validation-rmse:6.30004                                                     
[7]	validation-rmse:6.24562                                                     
[8]	validation-rmse:6.20747                                                     
[9]	validation-rmse:5.95048                                                     
[10]	validation-rmse:5.92176                                                    
[11]	validation-rmse:5.90107                                                    
[12]	validation-rmse:5.83409




[0]	validation-rmse:8.45283                                                     
[1]	validation-rmse:7.11488                                                     
[2]	validation-rmse:6.66612                                                     
[3]	validation-rmse:6.03429                                                     
[4]	validation-rmse:5.91442                                                     
[5]	validation-rmse:5.65599                                                     
[6]	validation-rmse:5.61074                                                     
[7]	validation-rmse:5.48644                                                     
[8]	validation-rmse:5.46672                                                     
[9]	validation-rmse:5.44916                                                     
[10]	validation-rmse:5.43405                                                    
[11]	validation-rmse:5.39086                                                    
[12]	validation-rmse:5.37934




[0]	validation-rmse:7.86845                                                     
[1]	validation-rmse:6.26953                                                     
[2]	validation-rmse:5.95105                                                     
[3]	validation-rmse:5.52462                                                     
[4]	validation-rmse:5.46640                                                     
[5]	validation-rmse:5.34912                                                     
[6]	validation-rmse:5.32983                                                     
[7]	validation-rmse:5.31170                                                     
[8]	validation-rmse:5.30192                                                     
[9]	validation-rmse:5.25713                                                     
[10]	validation-rmse:5.25301                                                    
[11]	validation-rmse:5.23975                                                    
[12]	validation-rmse:5.23783




[0]	validation-rmse:9.40635                                                     
[1]	validation-rmse:8.87481                                                     
[2]	validation-rmse:8.43776                                                     
[3]	validation-rmse:7.93924                                                     
[4]	validation-rmse:7.65146                                                     
[5]	validation-rmse:7.22843                                                     
[6]	validation-rmse:6.88135                                                     
[7]	validation-rmse:6.72477                                                     
[8]	validation-rmse:6.44565                                                     
[9]	validation-rmse:6.21842                                                     
[10]	validation-rmse:6.03769                                                    
[11]	validation-rmse:5.89843                                                    
[12]	validation-rmse:5.81750




[1]	validation-rmse:8.69362                                                     
[2]	validation-rmse:8.26818                                                     
[3]	validation-rmse:8.00749                                                     
[4]	validation-rmse:7.68951                                                     
[5]	validation-rmse:7.55444                                                     
[6]	validation-rmse:7.33218                                                     
[7]	validation-rmse:7.25417                                                     
[8]	validation-rmse:7.18653                                                     
[9]	validation-rmse:7.13996                                                     
[10]	validation-rmse:7.07000                                                    
[11]	validation-rmse:7.03740                                                    
[12]	validation-rmse:7.00573                                                    
[13]	validation-rmse:6.95795




[0]	validation-rmse:9.17714                                                     
[1]	validation-rmse:8.38043                                                     
[2]	validation-rmse:7.88213                                                     
[3]	validation-rmse:7.22992                                                     
[4]	validation-rmse:6.95466                                                     
[5]	validation-rmse:6.52279                                                     
[6]	validation-rmse:6.23010                                                     
[7]	validation-rmse:6.12124                                                     
[8]	validation-rmse:6.03586                                                     
[9]	validation-rmse:5.97398                                                     
[10]	validation-rmse:5.79375                                                    
[11]	validation-rmse:5.75966                                                    
[12]	validation-rmse:5.63747




[0]	validation-rmse:9.57298                                                     
[1]	validation-rmse:9.12603                                                     
[2]	validation-rmse:8.75632                                                     
[3]	validation-rmse:8.44538                                                     
[4]	validation-rmse:8.03730                                                     
[5]	validation-rmse:7.69420                                                     
[6]	validation-rmse:7.49974                                                     
[7]	validation-rmse:7.18381                                                     
[8]	validation-rmse:6.91370                                                     
[9]	validation-rmse:6.68644                                                     
[10]	validation-rmse:6.58395                                                    
[11]	validation-rmse:6.39432                                                    
[12]	validation-rmse:6.24451




[2]	validation-rmse:7.94759                                                     
[3]	validation-rmse:7.87055                                                     
[4]	validation-rmse:7.79237                                                     
[5]	validation-rmse:7.71387                                                     
[6]	validation-rmse:7.67154                                                     
[7]	validation-rmse:7.57726                                                     
[8]	validation-rmse:7.52757                                                     
[9]	validation-rmse:7.48877                                                     
[10]	validation-rmse:7.41199                                                    
[11]	validation-rmse:7.37312                                                    
[12]	validation-rmse:7.34157                                                    
[13]	validation-rmse:7.27687                                                    
[14]	validation-rmse:7.26269




[0]	validation-rmse:8.86477                                                     
[1]	validation-rmse:8.11219                                                     
[2]	validation-rmse:7.26710                                                     
[3]	validation-rmse:6.98899                                                     
[4]	validation-rmse:6.81580                                                     
[5]	validation-rmse:6.40913                                                     
[6]	validation-rmse:6.33237                                                     
[7]	validation-rmse:6.27599                                                     
[8]	validation-rmse:6.21028                                                     
[9]	validation-rmse:6.17376                                                     
[10]	validation-rmse:5.93359                                                    
[11]	validation-rmse:5.91376                                                    
[12]	validation-rmse:5.88888




[0]	validation-rmse:9.67791                                                     
[1]	validation-rmse:9.33418                                                     
[2]	validation-rmse:9.03176                                                     
[3]	validation-rmse:8.72879                                                     
[4]	validation-rmse:8.49788                                                     
[5]	validation-rmse:8.22660                                                     
[6]	validation-rmse:8.03923                                                     
[7]	validation-rmse:7.75564                                                     
[8]	validation-rmse:7.50845                                                     
[9]	validation-rmse:7.38129                                                     
[10]	validation-rmse:7.17316                                                    
[11]	validation-rmse:7.07565                                                    
[12]	validation-rmse:6.90754




[0]	validation-rmse:7.75988                                                     
[1]	validation-rmse:6.11451                                                     
[2]	validation-rmse:5.83648                                                     
[3]	validation-rmse:5.46542                                                     
[4]	validation-rmse:5.42017                                                     
[5]	validation-rmse:5.32868                                                     
[6]	validation-rmse:5.31280                                                     
[7]	validation-rmse:5.29884                                                     
[8]	validation-rmse:5.26016                                                     
[9]	validation-rmse:5.25629                                                     
[10]	validation-rmse:5.24054                                                    
[11]	validation-rmse:5.23755                                                    
[12]	validation-rmse:5.23193

In [33]:
params = {
    'learning_rate': 0.19408972548566633,
    'max_depth': 100,
    'min_child_weight': 1.8824847254189034,
    'objective': 'reg:linear',
    'reg_alpha': 0.13078016502323883,
    'reg_lambda': 0.0001070736826826826,
    'seed': 42
}

mlflow.xgboost.autolog()


booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=100, # was 1000 in the original code
    evals=[(valid, 'validation')],
    early_stopping_rounds=50
)

2024/06/09 19:56:48 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1e0774bee771491b8f0065a11743b93a', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


[0]	validation-rmse:8.98747
[1]	validation-rmse:8.25230
[2]	validation-rmse:7.73872
[3]	validation-rmse:7.03330
[4]	validation-rmse:6.52753
[5]	validation-rmse:6.25668
[6]	validation-rmse:6.11893
[7]	validation-rmse:5.85720
[8]	validation-rmse:5.78975
[9]	validation-rmse:5.62370
[10]	validation-rmse:5.53003
[11]	validation-rmse:5.50327
[12]	validation-rmse:5.43364
[13]	validation-rmse:5.41354
[14]	validation-rmse:5.40036
[15]	validation-rmse:5.38914
[16]	validation-rmse:5.33734
[17]	validation-rmse:5.32942
[18]	validation-rmse:5.29641
[19]	validation-rmse:5.29214
[20]	validation-rmse:5.28834
[21]	validation-rmse:5.28227
[22]	validation-rmse:5.25889
[23]	validation-rmse:5.25669
[24]	validation-rmse:5.25386
[25]	validation-rmse:5.23956
[26]	validation-rmse:5.23769
[27]	validation-rmse:5.23330
[28]	validation-rmse:5.23002
[29]	validation-rmse:5.22853
[30]	validation-rmse:5.22743
[31]	validation-rmse:5.21863
[32]	validation-rmse:5.21784
[33]	validation-rmse:5.21728
[34]	validation-rmse:5.2



In [34]:
mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



<mlflow.models.model.ModelInfo at 0x2c9c88e80>

In [35]:
mlflow.xgboost.autolog(disable=True)

In [38]:
import pickle

with mlflow.start_run():
    mlflow.set_tag("model", "xgboost")

    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.19408972548566633,
        'max_depth': 100,
        'min_child_weight': 1.8824847254189034,
        'objective': 'reg:linear',
        'reg_alpha': 0.13078016502323883,
        'reg_lambda': 0.0001070736826826826,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=100, # was 1000 in the original code
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")





[0]	validation-rmse:8.98747
[1]	validation-rmse:8.25230
[2]	validation-rmse:7.73872
[3]	validation-rmse:7.03330
[4]	validation-rmse:6.52753
[5]	validation-rmse:6.25668
[6]	validation-rmse:6.11893
[7]	validation-rmse:5.85720
[8]	validation-rmse:5.78975
[9]	validation-rmse:5.62370
[10]	validation-rmse:5.53003
[11]	validation-rmse:5.50327
[12]	validation-rmse:5.43364
[13]	validation-rmse:5.41354
[14]	validation-rmse:5.40036
[15]	validation-rmse:5.38914
[16]	validation-rmse:5.33734
[17]	validation-rmse:5.32942
[18]	validation-rmse:5.29641
[19]	validation-rmse:5.29214
[20]	validation-rmse:5.28834
[21]	validation-rmse:5.28227
[22]	validation-rmse:5.25889
[23]	validation-rmse:5.25669
[24]	validation-rmse:5.25386
[25]	validation-rmse:5.23956
[26]	validation-rmse:5.23769
[27]	validation-rmse:5.23330
[28]	validation-rmse:5.23002
[29]	validation-rmse:5.22853
[30]	validation-rmse:5.22743
[31]	validation-rmse:5.21863
[32]	validation-rmse:5.21784
[33]	validation-rmse:5.21728
[34]	validation-rmse:5.2



In [39]:
with mlflow.end_run():
    pass

AttributeError: __enter__

In [40]:
logged_model = "runs:/f30801391f284e0aaccfd60cba9a7aec/models_mlflow"

loaded_model = mlflow.pyfunc.load_model(logged_model)



In [41]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: f30801391f284e0aaccfd60cba9a7aec

In [42]:
xgboost_model = mlflow.xgboost.load_model(logged_model)



In [43]:
xgboost_model

<xgboost.core.Booster at 0x2ca914370>

In [46]:
y_pred = xgboost_model.predict(valid)

In [47]:
y_pred

array([ 6.3495045, 44.144955 , 15.8901   , ..., 15.219723 , 11.963885 ,
       13.507035 ], dtype=float32)

In [48]:
## creating new experiment

client.create_experiment("some-new-experiments-2")

'2'

In [53]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids="1", 
    filter_string="",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=2,
    order_by=["metrics.rmse ASC"]
    )


In [54]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: f30801391f284e0aaccfd60cba9a7aec, rmse: 5.1910
run id: 25efc624f1ac45b68366653dd21dfbe4, rmse: 8.1381


In [55]:
run_id = "25efc624f1ac45b68366653dd21dfbe4"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri, "nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: aliases=[], creation_timestamp=1717979370024, current_stage='None', description=None, last_updated_timestamp=1717979370024, name='nyc-taxi-regressor', run_id='25efc624f1ac45b68366653dd21dfbe4', run_link=None, source='/Users/kamilakolpashnikova/Dropbox/Mac/Documents/Jupyter_notebooks/mlops-2024/02-experiment-tracking/mlruns/1/25efc624f1ac45b68366653dd21dfbe4/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [58]:
client.get_registered_model("nyc-taxi-regressor")

<RegisteredModel: aliases={}, creation_timestamp=1717978511344, description='', last_updated_timestamp=1717979370024, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1717979370024, current_stage='None', description=None, last_updated_timestamp=1717979370024, name='nyc-taxi-regressor', run_id='25efc624f1ac45b68366653dd21dfbe4', run_link=None, source='/Users/kamilakolpashnikova/Dropbox/Mac/Documents/Jupyter_notebooks/mlops-2024/02-experiment-tracking/mlruns/1/25efc624f1ac45b68366653dd21dfbe4/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>], name='nyc-taxi-regressor', tags={}>

In [62]:
latest_versions = client.get_latest_versions("nyc-taxi-regressor")

  latest_versions = client.get_latest_versions("nyc-taxi-regressor")


In [63]:
for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 3, stage: None


In [65]:
client.transition_model_version_stage(
    name="nyc-taxi-regressor",
    version=1,
    stage="Staging",
    archive_existing_versions=False
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1717978511370, current_stage='Staging', description='', last_updated_timestamp=1717979813694, name='nyc-taxi-regressor', run_id='f30801391f284e0aaccfd60cba9a7aec', run_link='', source='/Users/kamilakolpashnikova/Dropbox/Mac/Documents/Jupyter_notebooks/mlops-2024/02-experiment-tracking/mlruns/1/f30801391f284e0aaccfd60cba9a7aec/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [66]:
from datetime import datetime

date = datetime.today().date().isoformat()

model_version = 1

client.update_model_version(
    name="nyc-taxi-regressor",
    version=model_version,
    description=f"Model version {model_version} trained on NYC Taxi dataset on {date}"
)

<ModelVersion: aliases=[], creation_timestamp=1717978511370, current_stage='Staging', description='Model version 1 trained on NYC Taxi dataset on 2024-06-09', last_updated_timestamp=1717980052577, name='nyc-taxi-regressor', run_id='f30801391f284e0aaccfd60cba9a7aec', run_link='', source='/Users/kamilakolpashnikova/Dropbox/Mac/Documents/Jupyter_notebooks/mlops-2024/02-experiment-tracking/mlruns/1/f30801391f284e0aaccfd60cba9a7aec/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=1>

def preprocess(df, dv):
    

In [67]:
def preprocess(df, dv):
    train_dict = df[['PULocationID', 'DOLocationID']].to_dict(orient='records')
    return dv.transform(train_dict)

def test_model(model_uri, X_test, y_test):
    model = mlflow.pyfunc.load_model(model_uri)
    y_pred = model.predict(X_test)
    return {"rmse": root_mean_squared_error(y_test, y_pred)}

Q1

Q2 4

Q3 min_samples_split 2

Q4 default-artifact-root

Q5 5.335

Q6 test_rmse
5.567408012462019