# Duration Prediction

In [39]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor, RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.metrics import root_mean_squared_error
import pickle
import mlflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
import xgboost as xgb

In [2]:
pd.options.mode.copy_on_write = True

## Preprocessing

In [3]:
def preprocessing(df):
    # compute target variables: trip duration in minutes
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    # keep only trips of less than an hour
    df = df.loc[(df.duration >= 1) & (df.duration <= 60), :]

    # extract trip date components
    df['trip_month'] = pd.Categorical(df.lpep_pickup_datetime.dt.month)
    df['trip_dom'] = pd.Categorical(df.lpep_pickup_datetime.dt.day)
    df['trip_hour'] = pd.Categorical(df.lpep_pickup_datetime.dt.hour)

    # concatenate pickup and dropoff locations
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)

    return df

In [4]:
def one_hot_encoding(df, numerical, categorical, dv=None):
    df[categorical] = df[categorical].astype(str)
    df_dicts = df[categorical + numerical].to_dict(orient='records')

    if not dv:
        dv = DictVectorizer()
        dv.fit(df_dicts)

    X_train = dv.transform(df_dicts)

    return X_train, dv

In [5]:
def train_model(model, X_train, y_train):
    model.fit(X_train, y_train)

In [6]:
# read im the raw data
df_train = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
df_val = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')

In [7]:
# data cleaning
df_train = preprocessing(df_train)
df_val = preprocessing(df_val)

In [8]:
# variable selection
categorical = ['PU_DO']
numerical = ['trip_distance']

In [9]:
# categorical feature encoding
X_train, dv = one_hot_encoding(df_train, numerical, categorical)
X_val, _ = one_hot_encoding(df_val, numerical, categorical, dv=dv)

In [10]:
target = 'duration'
y_train = df_train[target]
y_val = df_val[target]

## Experiment Tracking

In [11]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/Users/bastienwinant/Desktop/projects/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1748510026560, experiment_id='1', last_update_time=1748510026560, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [12]:
mlflow.autolog(disable=True)

In [13]:
with open('models/preprocessor.b', 'wb') as f_out:
    pickle.dump(dv, f_out)

### Manual Logging
#### Lasso

In [14]:
alpha=.01

In [15]:
lasso = Lasso(alpha=alpha)
train_model(lasso, X_train, y_train)
y_pred = lasso.predict(X_val)

In [16]:
rmse = root_mean_squared_error(y_val, y_pred)

In [17]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    # training params
    mlflow.log_param("training-data", 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
    mlflow.log_param("testing-data", 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')
    mlflow.log_param("alpha", alpha)

    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(lasso, artifact_path="models_mlflow")



#### Linear Regression

In [18]:
lr = LinearRegression()
train_model(lr, X_train, y_train)
y_pred = lr.predict(X_val)

In [19]:
rmse = root_mean_squared_error(y_val, y_pred)

In [20]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    # training params
    mlflow.log_param("training-data", 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
    mlflow.log_param("testing-data", 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')
    mlflow.log_param("alpha", alpha)

    # validation error
    mlflow.log_metric("rmse", rmse)

    # model artifacts
    mlflow.log_artifact(local_path="models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(lr, artifact_path="models_mlflow")



#### Ridge

In [21]:
alpha = .01

In [22]:
rr = Ridge(alpha=alpha, random_state=42)
train_model(rr, X_train, y_train)
y_pred = rr.predict(X_val)

In [23]:
rmse = root_mean_squared_error(y_val, y_pred)

In [24]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    # training params
    mlflow.log_param("training-data", 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
    mlflow.log_param("testing-data", 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')
    mlflow.log_param("alpha", alpha)

    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(rr, artifact_path="models_mlflow")



#### XGBoost

In [25]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [26]:
def objective(params):
    booster = xgb.train(
        params=params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, "validation")],
        early_stopping_rounds=50,
        verbose_eval=20
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)

    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        mlflow.log_metric("rmse", rmse)

        mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")
        mlflow.log_artifact(local_path="models/preprocessor.b", artifact_path="preprocessor")

    return {'loss': rmse, 'status': STATUS_OK}

In [27]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:squarederror',
    'seed': 42
}

In [28]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:10.49261                          
[20]	validation-rmse:6.50368                          
[40]	validation-rmse:6.46562                          
[60]	validation-rmse:6.44681                          
[80]	validation-rmse:6.43502                          
[100]	validation-rmse:6.42643                         
[120]	validation-rmse:6.42102                         
[140]	validation-rmse:6.41245                         
[160]	validation-rmse:6.40979                         
[180]	validation-rmse:6.40599                         
[200]	validation-rmse:6.40281                         
[220]	validation-rmse:6.40001                         
[240]	validation-rmse:6.39799                         
[260]	validation-rmse:6.39691                         
[280]	validation-rmse:6.39501                         
[300]	validation-rmse:6.39277                         
[320]	validation-rmse:6.39096                         
[340]	validation-rmse:6.39035                         
[360]	vali





[0]	validation-rmse:11.72116                                                   
[20]	validation-rmse:7.39178                                                   
[40]	validation-rmse:6.79755                                                   
[60]	validation-rmse:6.69036                                                   
[80]	validation-rmse:6.65887                                                   
[100]	validation-rmse:6.64652                                                  
[120]	validation-rmse:6.63938                                                  
[140]	validation-rmse:6.63545                                                  
[160]	validation-rmse:6.63295                                                  
[180]	validation-rmse:6.63094                                                  
[200]	validation-rmse:6.62931                                                  
[220]	validation-rmse:6.62796                                                  
[240]	validation-rmse:6.62675           





[0]	validation-rmse:6.93007                                                      
[20]	validation-rmse:6.68598                                                     
[40]	validation-rmse:6.64623                                                     
[60]	validation-rmse:6.62107                                                     
[80]	validation-rmse:6.61311                                                     
[100]	validation-rmse:6.60815                                                    
[120]	validation-rmse:6.61697                                                    
[140]	validation-rmse:6.60816                                                    
[160]	validation-rmse:6.60691                                                    
[180]	validation-rmse:6.60539                                                    
[200]	validation-rmse:6.60960                                                    
[220]	validation-rmse:6.61899                                                    
[233]	validation





[0]	validation-rmse:11.61876                                                     
[20]	validation-rmse:7.14073                                                   
[40]	validation-rmse:6.73263                                                   
[60]	validation-rmse:6.66506                                                   
[80]	validation-rmse:6.64771                                                   
[100]	validation-rmse:6.63903                                                  
[120]	validation-rmse:6.63479                                                  
[140]	validation-rmse:6.63222                                                  
[160]	validation-rmse:6.62993                                                  
[180]	validation-rmse:6.62805                                                  
[200]	validation-rmse:6.62654                                                  
[220]	validation-rmse:6.62510                                                  
[240]	validation-rmse:6.62367         





[0]	validation-rmse:10.01794                                                     
[20]	validation-rmse:6.50259                                                     
[40]	validation-rmse:6.47868                                                     
[60]	validation-rmse:6.47065                                                     
[80]	validation-rmse:6.46611                                                     
[100]	validation-rmse:6.46359                                                    
[120]	validation-rmse:6.45999                                                    
[140]	validation-rmse:6.45682                                                    
[160]	validation-rmse:6.45510                                                    
[180]	validation-rmse:6.45428                                                    
[200]	validation-rmse:6.45367                                                    
[220]	validation-rmse:6.45265                                                    
[240]	validation





[0]	validation-rmse:9.61404                                                      
[20]	validation-rmse:6.56318                                                   
[40]	validation-rmse:6.54645                                                   
[60]	validation-rmse:6.53706                                                   
[80]	validation-rmse:6.53363                                                   
[100]	validation-rmse:6.53089                                                  
[120]	validation-rmse:6.52752                                                  
[140]	validation-rmse:6.52795                                                  
[160]	validation-rmse:6.52662                                                  
[180]	validation-rmse:6.52541                                                  
[200]	validation-rmse:6.52554                                                  
[220]	validation-rmse:6.52520                                                  
[240]	validation-rmse:6.52494         





[0]	validation-rmse:8.46261                                                    
[20]	validation-rmse:6.52366                                                   
[40]	validation-rmse:6.48747                                                   
[60]	validation-rmse:6.47504                                                   
[80]	validation-rmse:6.46834                                                   
[100]	validation-rmse:6.45976                                                  
[120]	validation-rmse:6.46703                                                  
[140]	validation-rmse:6.47090                                                  
[150]	validation-rmse:6.47570                                                  
 12%|█▏        | 6/50 [07:20<46:56, 64.00s/trial, best loss: 6.379184085162657]





[0]	validation-rmse:8.19446                                                    
[20]	validation-rmse:6.54662                                                   
[40]	validation-rmse:6.53467                                                   
[60]	validation-rmse:6.52577                                                   
[80]	validation-rmse:6.52354                                                   
[100]	validation-rmse:6.51906                                                  
[120]	validation-rmse:6.51691                                                  
[140]	validation-rmse:6.51559                                                  
[160]	validation-rmse:6.51429                                                  
[180]	validation-rmse:6.51398                                                  
[200]	validation-rmse:6.51407                                                  
[220]	validation-rmse:6.51365                                                  
[240]	validation-rmse:6.51316           





[0]	validation-rmse:8.24632                                                    
[20]	validation-rmse:6.52931                                                   
[40]	validation-rmse:6.50229                                                   
[60]	validation-rmse:6.49821                                                   
[80]	validation-rmse:6.50243                                                   
[100]	validation-rmse:6.51259                                                  
 16%|█▌        | 8/50 [08:24<33:08, 47.35s/trial, best loss: 6.379184085162657]





[0]	validation-rmse:10.61381                                                   
[20]	validation-rmse:6.69248                                                   
[40]	validation-rmse:6.66303                                                   
[60]	validation-rmse:6.64538                                                   
[80]	validation-rmse:6.63499                                                   
[100]	validation-rmse:6.62499                                                  
[120]	validation-rmse:6.61732                                                  
[140]	validation-rmse:6.61033                                                  
[160]	validation-rmse:6.60586                                                  
[180]	validation-rmse:6.60041                                                  
[200]	validation-rmse:6.59653                                                  
[220]	validation-rmse:6.59067                                                  
[240]	validation-rmse:6.58877           





[0]	validation-rmse:11.48163                                                    
[20]	validation-rmse:6.84327                                                    
[40]	validation-rmse:6.55263                                                    
[60]	validation-rmse:6.50800                                                    
[80]	validation-rmse:6.49029                                                    
[100]	validation-rmse:6.47998                                                   
[120]	validation-rmse:6.46958                                                   
[140]	validation-rmse:6.46207                                                   
[160]	validation-rmse:6.45448                                                   
[180]	validation-rmse:6.44930                                                   
[200]	validation-rmse:6.44422                                                   
[220]	validation-rmse:6.43877                                                   
[240]	validation-rmse:6.4345





[0]	validation-rmse:7.74538                                                     
[20]	validation-rmse:6.46702                                                    
[40]	validation-rmse:6.43306                                                    
[60]	validation-rmse:6.42030                                                    
[80]	validation-rmse:6.41116                                                    
[100]	validation-rmse:6.40846                                                   
[120]	validation-rmse:6.40622                                                   
[140]	validation-rmse:6.40474                                                   
[160]	validation-rmse:6.40615                                                   
[180]	validation-rmse:6.40691                                                   
[190]	validation-rmse:6.40524                                                   
 22%|██▏       | 11/50 [11:37<44:57, 69.16s/trial, best loss: 6.379184085162657]





[0]	validation-rmse:9.44839                                                     
[20]	validation-rmse:6.49982                                                    
[40]	validation-rmse:6.45601                                                    
[60]	validation-rmse:6.42134                                                    
[80]	validation-rmse:6.39742                                                    
[100]	validation-rmse:6.38089                                                   
[120]	validation-rmse:6.36708                                                   
[140]	validation-rmse:6.35691                                                   
[160]	validation-rmse:6.34774                                                   
[180]	validation-rmse:6.34199                                                   
[200]	validation-rmse:6.33714                                                   
[220]	validation-rmse:6.32942                                                   
[240]	validation-rmse:6.3243





[0]	validation-rmse:8.82902                                                     
[20]	validation-rmse:6.65865                                                    
[40]	validation-rmse:6.61613                                                    
[60]	validation-rmse:6.58706                                                    
[80]	validation-rmse:6.56625                                                    
[100]	validation-rmse:6.54708                                                   
[120]	validation-rmse:6.53708                                                   
[140]	validation-rmse:6.52567                                                   
[160]	validation-rmse:6.51577                                                   
[180]	validation-rmse:6.50757                                                   
[200]	validation-rmse:6.49816                                                   
[220]	validation-rmse:6.49210                                                   
[240]	validation-rmse:6.4875





[0]	validation-rmse:7.61789                                                     
[20]	validation-rmse:6.53277                                                    
[40]	validation-rmse:6.51807                                                    
[60]	validation-rmse:6.51318                                                    
[80]	validation-rmse:6.51042                                                    
[100]	validation-rmse:6.50870                                                   
[120]	validation-rmse:6.51033                                                   
[140]	validation-rmse:6.51249                                                   
[157]	validation-rmse:6.51350                                                   
 28%|██▊       | 14/50 [13:24<27:41, 46.15s/trial, best loss: 6.310255867225004]





[0]	validation-rmse:6.87343                                                     
[20]	validation-rmse:6.55787                                                    
[40]	validation-rmse:6.52470                                                    
[60]	validation-rmse:6.51147                                                    
[80]	validation-rmse:6.52369                                                    
[100]	validation-rmse:6.53824                                                   
[113]	validation-rmse:6.55860                                                   
 30%|███       | 15/50 [13:52<23:58, 41.10s/trial, best loss: 6.310255867225004]





[0]	validation-rmse:9.99117                                                     
[20]	validation-rmse:6.53248                                                    
[40]	validation-rmse:6.49826                                                    
[60]	validation-rmse:6.47065                                                    
[80]	validation-rmse:6.45245                                                    
[100]	validation-rmse:6.44086                                                   
[120]	validation-rmse:6.42985                                                   
[140]	validation-rmse:6.41997                                                   
[160]	validation-rmse:6.41303                                                   
[180]	validation-rmse:6.40621                                                   
[200]	validation-rmse:6.40146                                                   
[220]	validation-rmse:6.39780                                                   
[240]	validation-rmse:6.3947





[0]	validation-rmse:11.66759                                                    
[20]	validation-rmse:7.10689                                                    
[40]	validation-rmse:6.56374                                                    
[60]	validation-rmse:6.46698                                                    
[80]	validation-rmse:6.43385                                                    
[100]	validation-rmse:6.42046                                                   
[120]	validation-rmse:6.41169                                                   
[140]	validation-rmse:6.40611                                                   
[160]	validation-rmse:6.40113                                                   
[180]	validation-rmse:6.39717                                                   
[200]	validation-rmse:6.39426                                                   
[220]	validation-rmse:6.39106                                                   
[240]	validation-rmse:6.3881





[0]	validation-rmse:10.23328                                                    
[20]	validation-rmse:6.59371                                                    
[40]	validation-rmse:6.57023                                                    
[60]	validation-rmse:6.56107                                                    
[80]	validation-rmse:6.55603                                                    
[100]	validation-rmse:6.55138                                                   
[120]	validation-rmse:6.54899                                                   
[140]	validation-rmse:6.54710                                                   
[160]	validation-rmse:6.54563                                                   
[180]	validation-rmse:6.54513                                                   
[200]	validation-rmse:6.54411                                                   
[220]	validation-rmse:6.54392                                                   
[240]	validation-rmse:6.5428





[0]	validation-rmse:9.43653                                                     
[20]	validation-rmse:6.64805                                                    
[40]	validation-rmse:6.63542                                                    
[60]	validation-rmse:6.62884                                                    
[80]	validation-rmse:6.62374                                                    
[100]	validation-rmse:6.61973                                                   
[120]	validation-rmse:6.61628                                                   
[140]	validation-rmse:6.61416                                                   
[160]	validation-rmse:6.61204                                                   
[180]	validation-rmse:6.61121                                                   
[200]	validation-rmse:6.60971                                                   
[220]	validation-rmse:6.60852                                                   
[240]	validation-rmse:6.6079





[0]	validation-rmse:11.12914                                                    
[20]	validation-rmse:6.63744                                                    
[40]	validation-rmse:6.55072                                                    
[60]	validation-rmse:6.52794                                                    
[80]	validation-rmse:6.50796                                                    
[100]	validation-rmse:6.49281                                                   
[120]	validation-rmse:6.47965                                                   
[140]	validation-rmse:6.46701                                                   
[160]	validation-rmse:6.45655                                                   
[180]	validation-rmse:6.44739                                                   
[200]	validation-rmse:6.43835                                                   
[220]	validation-rmse:6.43026                                                   
[240]	validation-rmse:6.4238





[0]	validation-rmse:11.09043                                                    
[20]	validation-rmse:6.64125                                                    
[40]	validation-rmse:6.56947                                                    
[60]	validation-rmse:6.54567                                                    
[80]	validation-rmse:6.52627                                                    
[100]	validation-rmse:6.50849                                                   
[120]	validation-rmse:6.49723                                                   
[140]	validation-rmse:6.48482                                                   
[160]	validation-rmse:6.47414                                                   
[180]	validation-rmse:6.46336                                                   
[200]	validation-rmse:6.45475                                                   
[220]	validation-rmse:6.44755                                                   
[240]	validation-rmse:6.4399





[0]	validation-rmse:11.09057                                                    
[20]	validation-rmse:6.60711                                                    
[40]	validation-rmse:6.52656                                                    
[60]	validation-rmse:6.50202                                                    
[80]	validation-rmse:6.48354                                                    
[100]	validation-rmse:6.46565                                                   
[120]	validation-rmse:6.45279                                                   
[140]	validation-rmse:6.44149                                                   
[160]	validation-rmse:6.42924                                                   
[180]	validation-rmse:6.42103                                                   
[200]	validation-rmse:6.41383                                                   
[220]	validation-rmse:6.40660                                                   
[240]	validation-rmse:6.3989





[0]	validation-rmse:10.95773                                                    
[20]	validation-rmse:6.57019                                                   
[40]	validation-rmse:6.51194                                                   
[60]	validation-rmse:6.48434                                                   
[80]	validation-rmse:6.46660                                                   
[100]	validation-rmse:6.44855                                                  
[120]	validation-rmse:6.43739                                                  
[140]	validation-rmse:6.42558                                                  
[160]	validation-rmse:6.41480                                                  
[180]	validation-rmse:6.40646                                                  
[200]	validation-rmse:6.39926                                                  
[220]	validation-rmse:6.39111                                                  
[240]	validation-rmse:6.38396          





[0]	validation-rmse:10.96537                                                    
[20]	validation-rmse:6.51629                                                    
[40]	validation-rmse:6.44505                                                    
[60]	validation-rmse:6.42085                                                    
[80]	validation-rmse:6.40391                                                    
[100]	validation-rmse:6.39166                                                   
[120]	validation-rmse:6.38241                                                   
[140]	validation-rmse:6.37354                                                   
[160]	validation-rmse:6.36718                                                   
[180]	validation-rmse:6.36088                                                   
[200]	validation-rmse:6.35657                                                   
[220]	validation-rmse:6.35136                                                   
[240]	validation-rmse:6.3481





[0]	validation-rmse:11.38465                                                    
[20]	validation-rmse:6.83760                                                    
[40]	validation-rmse:6.66606                                                    
[60]	validation-rmse:6.64324                                                    
[80]	validation-rmse:6.62970                                                    
[100]	validation-rmse:6.61718                                                   
[120]	validation-rmse:6.60836                                                   
[140]	validation-rmse:6.59977                                                   
[160]	validation-rmse:6.59148                                                   
[180]	validation-rmse:6.58494                                                   
[200]	validation-rmse:6.57788                                                   
[220]	validation-rmse:6.57126                                                   
[240]	validation-rmse:6.5642





[0]	validation-rmse:11.79688                                                    
[20]	validation-rmse:7.59202                                                    
[40]	validation-rmse:6.75784                                                    
[60]	validation-rmse:6.57850                                                    
[80]	validation-rmse:6.52382                                                    
[100]	validation-rmse:6.50303                                                   
[120]	validation-rmse:6.49222                                                   
[140]	validation-rmse:6.48239                                                   
[160]	validation-rmse:6.47436                                                   
[180]	validation-rmse:6.46726                                                   
[200]	validation-rmse:6.46025                                                   
[220]	validation-rmse:6.45408                                                   
[240]	validation-rmse:6.4486





[0]	validation-rmse:11.31496                                                    
[20]	validation-rmse:6.63179                                                    
[40]	validation-rmse:6.46161                                                    
[60]	validation-rmse:6.43748                                                    
[80]	validation-rmse:6.41898                                                    
[100]	validation-rmse:6.40643                                                   
[120]	validation-rmse:6.39347                                                   
[140]	validation-rmse:6.38335                                                   
[160]	validation-rmse:6.37526                                                   
[180]	validation-rmse:6.36758                                                   
[200]	validation-rmse:6.36130                                                   
[220]	validation-rmse:6.35662                                                   
[240]	validation-rmse:6.3510





[0]	validation-rmse:10.72627                                                     
[20]	validation-rmse:6.49606                                                     
[40]	validation-rmse:6.44649                                                     
[60]	validation-rmse:6.42107                                                     
[80]	validation-rmse:6.40515                                                     
[100]	validation-rmse:6.39273                                                    
[120]	validation-rmse:6.38158                                                    
[140]	validation-rmse:6.37236                                                    
[160]	validation-rmse:6.36600                                                    
[180]	validation-rmse:6.36041                                                    
[200]	validation-rmse:6.35538                                                    
[220]	validation-rmse:6.34952                                                    
[240]	validation





[0]	validation-rmse:10.81331                                                     
[20]	validation-rmse:6.60735                                                    
[40]	validation-rmse:6.56204                                                    
[60]	validation-rmse:6.53931                                                    
[80]	validation-rmse:6.52383                                                    
[100]	validation-rmse:6.51190                                                   
[120]	validation-rmse:6.49846                                                   
[140]	validation-rmse:6.48578                                                   
[160]	validation-rmse:6.47688                                                   
[180]	validation-rmse:6.46992                                                   
[200]	validation-rmse:6.46393                                                   
[220]	validation-rmse:6.45701                                                   
[240]	validation-rmse:6.450





[0]	validation-rmse:11.22822                                                     
[20]	validation-rmse:6.60651                                                     
[40]	validation-rmse:6.47696                                                     
[60]	validation-rmse:6.45326                                                     
[80]	validation-rmse:6.43515                                                     
[100]	validation-rmse:6.42017                                                    
[120]	validation-rmse:6.40766                                                    
[140]	validation-rmse:6.39751                                                    
[160]	validation-rmse:6.38867                                                    
[180]	validation-rmse:6.38054                                                    
[200]	validation-rmse:6.37436                                                    
[220]	validation-rmse:6.36740                                                    
[240]	validation





[0]	validation-rmse:11.80450                                                     
[20]	validation-rmse:7.66008                                                     
[40]	validation-rmse:6.81816                                                     
[60]	validation-rmse:6.63191                                                     
[80]	validation-rmse:6.57816                                                     
[100]	validation-rmse:6.55820                                                    
[120]	validation-rmse:6.54698                                                    
[140]	validation-rmse:6.53751                                                    
[160]	validation-rmse:6.53003                                                    
[180]	validation-rmse:6.52406                                                    
[200]	validation-rmse:6.51748                                                    
[220]	validation-rmse:6.51256                                                    
[240]	validation





[0]	validation-rmse:11.56366                                                     
[20]	validation-rmse:6.88975                                                     
[40]	validation-rmse:6.50700                                                     
[60]	validation-rmse:6.44879                                                     
[80]	validation-rmse:6.43281                                                     
[100]	validation-rmse:6.41883                                                    
[120]	validation-rmse:6.40809                                                    
[140]	validation-rmse:6.39864                                                    
[160]	validation-rmse:6.39049                                                    
[180]	validation-rmse:6.38326                                                    
[200]	validation-rmse:6.37698                                                    
[220]	validation-rmse:6.37192                                                    
[240]	validation





[0]	validation-rmse:11.25707                                                     
[20]	validation-rmse:6.57171                                                     
[40]	validation-rmse:6.42442                                                     
[60]	validation-rmse:6.40293                                                     
[80]	validation-rmse:6.38632                                                     
[100]	validation-rmse:6.37454                                                    
[120]	validation-rmse:6.36505                                                    
[140]	validation-rmse:6.35740                                                    
[160]	validation-rmse:6.35147                                                    
[180]	validation-rmse:6.34632                                                    
[200]	validation-rmse:6.34158                                                    
[220]	validation-rmse:6.33640                                                    
[240]	validation





[0]	validation-rmse:10.35822                                                     
[20]	validation-rmse:6.57204                                                     
[40]	validation-rmse:6.53247                                                     
[60]	validation-rmse:6.50436                                                     
[80]	validation-rmse:6.48597                                                     
[100]	validation-rmse:6.46978                                                    
[120]	validation-rmse:6.45695                                                    
[140]	validation-rmse:6.44506                                                    
[160]	validation-rmse:6.43610                                                    
[180]	validation-rmse:6.42837                                                    
[200]	validation-rmse:6.42100                                                    
[220]	validation-rmse:6.41491                                                    
[240]	validation





[0]	validation-rmse:11.51832                                                     
[20]	validation-rmse:7.05087                                                     
[40]	validation-rmse:6.80174                                                     
[60]	validation-rmse:6.77286                                                     
[80]	validation-rmse:6.75974                                                     
[100]	validation-rmse:6.74964                                                    
[120]	validation-rmse:6.74110                                                    
[140]	validation-rmse:6.73339                                                    
[160]	validation-rmse:6.72623                                                    
[180]	validation-rmse:6.71994                                                    
[200]	validation-rmse:6.71455                                                    
[220]	validation-rmse:6.70731                                                    
[240]	validation





[0]	validation-rmse:11.74634                                                     
[20]	validation-rmse:7.39321                                                     
[40]	validation-rmse:6.68141                                                     
[60]	validation-rmse:6.53917                                                     
[80]	validation-rmse:6.49903                                                     
[100]	validation-rmse:6.48753                                                    
[120]	validation-rmse:6.47637                                                    
[140]	validation-rmse:6.46675                                                    
[160]	validation-rmse:6.45883                                                    
[180]	validation-rmse:6.45115                                                    
[200]	validation-rmse:6.44380                                                    
[220]	validation-rmse:6.43726                                                    
[240]	validation





[0]	validation-rmse:10.58942                                                     
[20]	validation-rmse:6.46449                                                     
[40]	validation-rmse:6.42008                                                     
[60]	validation-rmse:6.39775                                                     
[80]	validation-rmse:6.38261                                                     
[100]	validation-rmse:6.37163                                                    
[120]	validation-rmse:6.36307                                                    
[140]	validation-rmse:6.35659                                                    
[160]	validation-rmse:6.35116                                                    
[180]	validation-rmse:6.34624                                                    
[200]	validation-rmse:6.34425                                                    
[220]	validation-rmse:6.34012                                                    
[240]	validation





[0]	validation-rmse:10.92887                                                     
[20]	validation-rmse:6.58883                                                    
[40]	validation-rmse:6.52731                                                    
[60]	validation-rmse:6.50330                                                    
[80]	validation-rmse:6.48675                                                    
[100]	validation-rmse:6.47191                                                   
[120]	validation-rmse:6.45952                                                   
[140]	validation-rmse:6.45158                                                   
[160]	validation-rmse:6.44359                                                   
[180]	validation-rmse:6.43705                                                   
[200]	validation-rmse:6.43018                                                   
[220]	validation-rmse:6.42754                                                   
[240]	validation-rmse:6.422





[0]	validation-rmse:9.81553                                                     
[20]	validation-rmse:6.40125                                                    
[40]	validation-rmse:6.36347                                                    
[60]	validation-rmse:6.34872                                                    
[80]	validation-rmse:6.33779                                                    
[100]	validation-rmse:6.33229                                                   
[120]	validation-rmse:6.33021                                                   
[140]	validation-rmse:6.33282                                                   
[160]	validation-rmse:6.33399                                                   
[175]	validation-rmse:6.33563                                                   
 78%|███████▊  | 39/50 [54:22<17:26, 95.10s/trial, best loss: 6.304227754735907]





[0]	validation-rmse:11.67576                                                    
[20]	validation-rmse:7.23664                                                    
[40]	validation-rmse:6.72322                                                    
[60]	validation-rmse:6.64099                                                    
[80]	validation-rmse:6.62062                                                    
[100]	validation-rmse:6.60995                                                   
[120]	validation-rmse:6.60040                                                   
[140]	validation-rmse:6.59122                                                   
[160]	validation-rmse:6.58368                                                   
[180]	validation-rmse:6.57730                                                   
[200]	validation-rmse:6.57122                                                   
[220]	validation-rmse:6.56547                                                   
[240]	validation-rmse:6.5604





[0]	validation-rmse:11.23200                                                    
[20]	validation-rmse:6.62064                                                    
[40]	validation-rmse:6.48925                                                    
[60]	validation-rmse:6.46574                                                    
[80]	validation-rmse:6.44715                                                    
[100]	validation-rmse:6.43396                                                   
[120]	validation-rmse:6.42154                                                   
[140]	validation-rmse:6.41214                                                   
[160]	validation-rmse:6.40326                                                   
[180]	validation-rmse:6.39643                                                   
[200]	validation-rmse:6.38967                                                   
[220]	validation-rmse:6.38335                                                   
[240]	validation-rmse:6.3782





[0]	validation-rmse:8.88912                                                     
[20]	validation-rmse:6.49934                                                    
[40]	validation-rmse:6.45167                                                    
[60]	validation-rmse:6.42628                                                    
[80]	validation-rmse:6.41298                                                    
[100]	validation-rmse:6.39853                                                   
[120]	validation-rmse:6.38762                                                   
[140]	validation-rmse:6.38316                                                   
[160]	validation-rmse:6.37785                                                   
[180]	validation-rmse:6.37436                                                   
[200]	validation-rmse:6.37327                                                   
[220]	validation-rmse:6.36993                                                   
[240]	validation-rmse:6.3679





[0]	validation-rmse:11.41209                                                    
[20]	validation-rmse:6.72741                                                    
[40]	validation-rmse:6.50198                                                    
[60]	validation-rmse:6.45964                                                    
[80]	validation-rmse:6.44548                                                    
[100]	validation-rmse:6.43733                                                   
[120]	validation-rmse:6.43088                                                   
[140]	validation-rmse:6.42588                                                   
[160]	validation-rmse:6.42183                                                   
[180]	validation-rmse:6.41835                                                   
[200]	validation-rmse:6.41463                                                   
[220]	validation-rmse:6.41151                                                   
[240]	validation-rmse:6.4081





[0]	validation-rmse:11.57967                                                      
[20]	validation-rmse:7.01278                                                      
[40]	validation-rmse:6.63254                                                      
[60]	validation-rmse:6.57757                                                      
[80]	validation-rmse:6.56076                                                      
[100]	validation-rmse:6.54756                                                     
[120]	validation-rmse:6.53937                                                     
[140]	validation-rmse:6.53001                                                     
[160]	validation-rmse:6.52286                                                     
[180]	validation-rmse:6.51571                                                     
[200]	validation-rmse:6.50972                                                     
[220]	validation-rmse:6.50461                                                     
[240





[0]	validation-rmse:10.44760                                                       
[20]	validation-rmse:6.46907                                                       
[40]	validation-rmse:6.42915                                                       
[60]	validation-rmse:6.40472                                                       
[80]	validation-rmse:6.38314                                                       
[100]	validation-rmse:6.37129                                                      
[120]	validation-rmse:6.35949                                                      
[140]	validation-rmse:6.34930                                                      
[160]	validation-rmse:6.34283                                                      
[180]	validation-rmse:6.33547                                                      
[200]	validation-rmse:6.32794                                                      
[220]	validation-rmse:6.32157                                               





[0]	validation-rmse:11.45045                                                       
[20]	validation-rmse:6.76601                                                      
[40]	validation-rmse:6.50621                                                      
[60]	validation-rmse:6.47533                                                      
[80]	validation-rmse:6.45713                                                      
[100]	validation-rmse:6.44281                                                     
[120]	validation-rmse:6.43014                                                     
[140]	validation-rmse:6.41997                                                     
[160]	validation-rmse:6.41128                                                     
[180]	validation-rmse:6.40289                                                     
[200]	validation-rmse:6.39591                                                     
[220]	validation-rmse:6.38974                                                     
[24





[0]	validation-rmse:11.22574                                                       
[20]	validation-rmse:6.79542                                                       
[40]	validation-rmse:6.70243                                                       
[60]	validation-rmse:6.68202                                                       
[80]	validation-rmse:6.66937                                                       
[100]	validation-rmse:6.65696                                                      
[120]	validation-rmse:6.64817                                                      
[140]	validation-rmse:6.64104                                                      
[160]	validation-rmse:6.63276                                                      
[180]	validation-rmse:6.62458                                                      
[200]	validation-rmse:6.61643                                                      
[220]	validation-rmse:6.61062                                               





[0]	validation-rmse:9.23344                                                        
[20]	validation-rmse:6.52904                                                      
[40]	validation-rmse:6.50524                                                      
[60]	validation-rmse:6.49564                                                      
[80]	validation-rmse:6.48992                                                      
[100]	validation-rmse:6.48622                                                     
[120]	validation-rmse:6.48309                                                     
[140]	validation-rmse:6.48085                                                     
[160]	validation-rmse:6.47893                                                     
[180]	validation-rmse:6.47807                                                     
[200]	validation-rmse:6.47653                                                     
[220]	validation-rmse:6.47520                                                     
[24





[0]	validation-rmse:9.73061                                                       
[20]	validation-rmse:6.62864                                                      
[40]	validation-rmse:6.58666                                                      
[60]	validation-rmse:6.56282                                                      
[80]	validation-rmse:6.54243                                                      
[100]	validation-rmse:6.52837                                                     
[120]	validation-rmse:6.51471                                                     
[140]	validation-rmse:6.50343                                                     
[160]	validation-rmse:6.49450                                                     
[180]	validation-rmse:6.48225                                                     
[200]	validation-rmse:6.47204                                                     
[220]	validation-rmse:6.46308                                                     
[240





100%|██████████| 50/50 [1:08:03<00:00, 81.68s/trial, best loss: 6.304227754735907]


### Autologging

In [40]:
mlflow.xgboost.autolog()

In [30]:
params = {
    'learning_rate': 0.19030171678228142,
    'max_depth': 29,
    'min_child_weight': 1.000763936993607,
    'objective': 'reg:squarederror',
    'reg_alpha': 0.20924104314941339,
    'reg_lambda': 0.0025628242268120804,
    'seed': 42
}

In [31]:
booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=1000,
    evals=[(valid, "validation")],
    early_stopping_rounds=50,
    verbose_eval=20
)

y_pred = booster.predict(valid)
rmse = root_mean_squared_error(y_val, y_pred)

2025/05/30 16:09:02 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'ca3713d82e7a4f44a2df7de61d6c7418', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


[0]	validation-rmse:10.71397
[20]	validation-rmse:6.53625
[40]	validation-rmse:6.49724
[60]	validation-rmse:6.47088
[80]	validation-rmse:6.44681
[100]	validation-rmse:6.43238
[120]	validation-rmse:6.41863
[140]	validation-rmse:6.40743
[160]	validation-rmse:6.39713
[180]	validation-rmse:6.38836
[200]	validation-rmse:6.37959
[220]	validation-rmse:6.37267
[240]	validation-rmse:6.36569
[260]	validation-rmse:6.36134
[280]	validation-rmse:6.35735
[300]	validation-rmse:6.35217
[320]	validation-rmse:6.34867
[340]	validation-rmse:6.34376
[360]	validation-rmse:6.33996
[380]	validation-rmse:6.33685
[400]	validation-rmse:6.33479
[420]	validation-rmse:6.33094
[440]	validation-rmse:6.32919
[460]	validation-rmse:6.32751
[480]	validation-rmse:6.32493
[500]	validation-rmse:6.32294
[520]	validation-rmse:6.31874
[540]	validation-rmse:6.31647
[560]	validation-rmse:6.31506
[580]	validation-rmse:6.31557
[600]	validation-rmse:6.31512
[620]	validation-rmse:6.31329
[640]	validation-rmse:6.31306
[660]	validatio



#### Linear SVR

In [42]:
svr = LinearSVR(max_iter=1000000)
train_model(svr, X_train, y_train)
y_pred = svr.predict(X_val)
rmse = root_mean_squared_error(y_val, y_pred)



#### Random Forest

In [43]:
rf = RandomForestRegressor()
train_model(rf, X_train, y_train)
y_pred = rf.predict(X_val)
rmse = root_mean_squared_error(y_val, y_pred)

#### Extra Trees

In [None]:
extra = ExtraTreesRegressor()
train_model(extra, X_train, y_train)
y_pred = extra.predict(X_val)
rmse = root_mean_squared_error(y_val, y_pred)

#### Gradient Boosting

In [None]:
gb = GradientBoostingRegressor()
train_model(gb, X_train, y_train)
y_pred = gb.predict(X_val)
rmse = root_mean_squared_error(y_val, y_pred)

### Model Management

In [32]:
mlflow.xgboost.autolog(disable=True)

In [33]:
with mlflow.start_run():
    mlflow.log_params(params)

    mlflow.log_metric('rmse', rmse)

    mlflow.log_artifact(local_path="models/preprocessor.b", artifact_path="preprocessor")
    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



### Model Retrieval
#### Python

In [34]:
logged_model = 'runs:/65425f7ef71644588856d6163ad035a0/models_mlflow'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [35]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: 65425f7ef71644588856d6163ad035a0

#### XGBoost

In [36]:
xgboost_model = mlflow.xgboost.load_model(logged_model)

In [37]:
xgboost_model

<xgboost.core.Booster at 0x13a08b370>

In [38]:
xgboost_model.predict(valid)

array([14.496855 ,  7.109053 , 15.07815  , ..., 13.511075 ,  6.2299643,
        8.019186 ], dtype=float32)

### Model Registry