In [1]:
!python -V

Python 3.12.3


In [2]:
import pandas as pd

In [3]:
import pickle

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error

In [7]:
import mlflow


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/home/kamire/projects/mlops/experiment-tracking/mlruns/1', creation_time=1716473170369, experiment_id='1', last_update_time=1716473170369, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [28]:
mlflow.__version__

'2.13.0'

In [9]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [10]:
df_train = read_dataframe('./homework/data/green_tripdata_2023-01.parquet')
df_val = read_dataframe('./homework/data/green_tripdata_2023-02.parquet')

In [11]:
len(df_train), len(df_val)

(65946, 62574)

In [12]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [13]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [14]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [15]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



6.037710512990993

In [17]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [19]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "./homework/data/green_tripdata_2023-01.parquet")
    mlflow.log_param("valid-data-path", "./homework/data/green_tripdata_2023-02.parquet")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models_pickle")



In [20]:
import xgboost as xgb

In [21]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [22]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [23]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [24]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:8.21353                           
[1]	validation-rmse:7.37656                           
[2]	validation-rmse:6.76278                           
[3]	validation-rmse:6.31595                           
[4]	validation-rmse:5.99484                           
[5]	validation-rmse:5.76414                           
[6]	validation-rmse:5.60398                           
[7]	validation-rmse:5.48988                           
[8]	validation-rmse:5.40879                           
[9]	validation-rmse:5.34975                           
[10]	validation-rmse:5.31121                          
[11]	validation-rmse:5.27874                          
[12]	validation-rmse:5.25586                          
[13]	validation-rmse:5.23513                          
[14]	validation-rmse:5.22219                          
[15]	validation-rmse:5.21184                          
[16]	validation-rmse:5.20498                          
[17]	validation-rmse:5.19977                          
[18]	valid





[8]	validation-rmse:5.59975                                                    
[9]	validation-rmse:5.57487                                                    
[10]	validation-rmse:5.56112                                                   
[11]	validation-rmse:5.54994                                                   
[12]	validation-rmse:5.54241                                                   
[13]	validation-rmse:5.53697                                                   
[14]	validation-rmse:5.53486                                                   
[15]	validation-rmse:5.52884                                                   
[16]	validation-rmse:5.52501                                                   
[17]	validation-rmse:5.52281                                                   
[18]	validation-rmse:5.51965                                                   
[19]	validation-rmse:5.51499                                                   
[20]	validation-rmse:5.50943            





[0]	validation-rmse:8.82393                                                    
[1]	validation-rmse:8.38335                                                    
[2]	validation-rmse:7.99006                                                    
[3]	validation-rmse:7.64877                                                    
[4]	validation-rmse:7.34007                                                    
[5]	validation-rmse:7.07373                                                    
[6]	validation-rmse:6.83568                                                    
[7]	validation-rmse:6.63185                                                    
[8]	validation-rmse:6.45494                                                    
[9]	validation-rmse:6.29165                                                    
[10]	validation-rmse:6.16080                                                   
[11]	validation-rmse:6.04192                                                   
[12]	validation-rmse:5.94417            





[4]	validation-rmse:5.47717                                                    
[5]	validation-rmse:5.46431                                                    
[6]	validation-rmse:5.46122                                                    
[7]	validation-rmse:5.46092                                                    
[8]	validation-rmse:5.45085                                                    
[9]	validation-rmse:5.44814                                                    
[10]	validation-rmse:5.44731                                                   
[11]	validation-rmse:5.44298                                                   
[12]	validation-rmse:5.43898                                                   
[13]	validation-rmse:5.43594                                                   
[14]	validation-rmse:5.43396                                                   
[15]	validation-rmse:5.42876                                                   
[16]	validation-rmse:5.42531            





[0]	validation-rmse:8.14769                                                    
[1]	validation-rmse:7.28633                                                    
[2]	validation-rmse:6.67763                                                    
[3]	validation-rmse:6.24261                                                    
[4]	validation-rmse:5.95184                                                    
[5]	validation-rmse:5.72795                                                    
[6]	validation-rmse:5.59823                                                    
[7]	validation-rmse:5.48658                                                    
[8]	validation-rmse:5.42070                                                    
[9]	validation-rmse:5.37283                                                    
[10]	validation-rmse:5.33294                                                   
[11]	validation-rmse:5.31711                                                   
[12]	validation-rmse:5.30537            





[0]	validation-rmse:8.06188                                                    
[1]	validation-rmse:7.16041                                                    
[2]	validation-rmse:6.53118                                                    
[3]	validation-rmse:6.10488                                                    
[4]	validation-rmse:5.82001                                                    
[5]	validation-rmse:5.63087                                                    
[6]	validation-rmse:5.50524                                                    
[7]	validation-rmse:5.41911                                                    
[8]	validation-rmse:5.36036                                                    
[9]	validation-rmse:5.32178                                                    
[10]	validation-rmse:5.29327                                                   
[11]	validation-rmse:5.27333                                                   
[12]	validation-rmse:5.25760            





[0]	validation-rmse:8.88853                                                    
[1]	validation-rmse:8.49681                                                    
[2]	validation-rmse:8.14193                                                    
[3]	validation-rmse:7.82150                                                    
[4]	validation-rmse:7.53101                                                    
[5]	validation-rmse:7.27111                                                    
[6]	validation-rmse:7.04000                                                    
[7]	validation-rmse:6.83025                                                    
[8]	validation-rmse:6.64588                                                    
[9]	validation-rmse:6.47984                                                    
[10]	validation-rmse:6.33311                                                   
[11]	validation-rmse:6.20109                                                   
[12]	validation-rmse:6.08241            





[0]	validation-rmse:6.18660                                                    
[1]	validation-rmse:5.44499                                                    
[2]	validation-rmse:5.27990                                                    
[3]	validation-rmse:5.23354                                                    
[4]	validation-rmse:5.21273                                                    
[5]	validation-rmse:5.20151                                                    
[6]	validation-rmse:5.19947                                                    
[7]	validation-rmse:5.19317                                                    
[8]	validation-rmse:5.17803                                                    
[9]	validation-rmse:5.17709                                                    
[10]	validation-rmse:5.17430                                                   
[11]	validation-rmse:5.17252                                                   
[12]	validation-rmse:5.17400            





[0]	validation-rmse:6.45112                                                    
[1]	validation-rmse:5.65950                                                    
[2]	validation-rmse:5.45041                                                    
[3]	validation-rmse:5.37875                                                    
[4]	validation-rmse:5.34335                                                    
[5]	validation-rmse:5.32658                                                    
[6]	validation-rmse:5.31484                                                    
[7]	validation-rmse:5.30101                                                    
[8]	validation-rmse:5.29852                                                    
[9]	validation-rmse:5.29541                                                    
[10]	validation-rmse:5.29324                                                   
[11]	validation-rmse:5.28152                                                   
[12]	validation-rmse:5.28056            





[1]	validation-rmse:8.63161                                                    
[2]	validation-rmse:8.32788                                                    
[3]	validation-rmse:8.04943                                                    
[4]	validation-rmse:7.79452                                                    
[5]	validation-rmse:7.56235                                                    
[6]	validation-rmse:7.34967                                                    
[7]	validation-rmse:7.15646                                                    
[8]	validation-rmse:6.97961                                                    
[9]	validation-rmse:6.81948                                                    
[10]	validation-rmse:6.67390                                                   
[11]	validation-rmse:6.54171                                                   
[12]	validation-rmse:6.42263                                                   
[13]	validation-rmse:6.31493            





[0]	validation-rmse:8.31277                                                     
[1]	validation-rmse:7.53005                                                     
[2]	validation-rmse:6.93249                                                     
[3]	validation-rmse:6.48399                                                     
[4]	validation-rmse:6.15185                                                     
[5]	validation-rmse:5.90812                                                     
[6]	validation-rmse:5.73180                                                     
[7]	validation-rmse:5.60245                                                     
[8]	validation-rmse:5.50814                                                     
[9]	validation-rmse:5.43666                                                     
[10]	validation-rmse:5.38552                                                    
[11]	validation-rmse:5.34697                                                    
[12]	validation-rmse:5.32007





[0]	validation-rmse:8.38218                                                     
[1]	validation-rmse:7.63379                                                     
[2]	validation-rmse:7.04532                                                     
[3]	validation-rmse:6.58907                                                     
[4]	validation-rmse:6.23989                                                     
[5]	validation-rmse:5.97348                                                     
[6]	validation-rmse:5.77203                                                     
[7]	validation-rmse:5.62181                                                     
[8]	validation-rmse:5.51028                                                     
[9]	validation-rmse:5.42782                                                     
[10]	validation-rmse:5.36601                                                    
[11]	validation-rmse:5.32054                                                    
[12]	validation-rmse:5.28502





[0]	validation-rmse:8.91177                                                     
[1]	validation-rmse:8.53821                                                     
[2]	validation-rmse:8.19850                                                     
[3]	validation-rmse:7.88950                                                     
[4]	validation-rmse:7.60968                                                     
[5]	validation-rmse:7.35709                                                     
[6]	validation-rmse:7.12925                                                     
[7]	validation-rmse:6.92426                                                     
[8]	validation-rmse:6.74010                                                     
[9]	validation-rmse:6.57509                                                     
[10]	validation-rmse:6.42779                                                    
[11]	validation-rmse:6.29624                                                    
[12]	validation-rmse:6.17951





[0]	validation-rmse:6.86233                                                     
[1]	validation-rmse:5.87686                                                     
[2]	validation-rmse:5.52691                                                     
[3]	validation-rmse:5.41003                                                     
[4]	validation-rmse:5.33328                                                     
[5]	validation-rmse:5.31270                                                     
[6]	validation-rmse:5.29691                                                     
[7]	validation-rmse:5.28932                                                     
[8]	validation-rmse:5.28692                                                     
[9]	validation-rmse:5.28623                                                     
[10]	validation-rmse:5.27724                                                    
[11]	validation-rmse:5.27345                                                    
[12]	validation-rmse:5.27056





[0]	validation-rmse:5.54301                                                     
[1]	validation-rmse:5.47594                                                     
[2]	validation-rmse:5.46822                                                     
[3]	validation-rmse:5.46034                                                     
[4]	validation-rmse:5.46249                                                     
[5]	validation-rmse:5.45040                                                     
[6]	validation-rmse:5.45181                                                     
[7]	validation-rmse:5.45309                                                     
[8]	validation-rmse:5.44964                                                     
[9]	validation-rmse:5.44680                                                     
[10]	validation-rmse:5.45201                                                    
[11]	validation-rmse:5.46097                                                    
[12]	validation-rmse:5.46647





[0]	validation-rmse:8.95809                                                     
[1]	validation-rmse:8.62317                                                     
[2]	validation-rmse:8.31347                                                     
[3]	validation-rmse:8.02865                                                     
[4]	validation-rmse:7.76699                                                     
[5]	validation-rmse:7.52690                                                     
[6]	validation-rmse:7.30822                                                     
[7]	validation-rmse:7.10788                                                     
[8]	validation-rmse:6.92367                                                     
[9]	validation-rmse:6.75759                                                     
[10]	validation-rmse:6.60515                                                    
[11]	validation-rmse:6.46582                                                    
[12]	validation-rmse:6.34228





[0]	validation-rmse:8.90394                                                     
[1]	validation-rmse:8.52591                                                     
[2]	validation-rmse:8.18175                                                     
[3]	validation-rmse:7.87295                                                     
[4]	validation-rmse:7.59305                                                     
[5]	validation-rmse:7.34183                                                     
[6]	validation-rmse:7.11617                                                     
[7]	validation-rmse:6.91481                                                     
[8]	validation-rmse:6.73349                                                     
[9]	validation-rmse:6.57250                                                     
[10]	validation-rmse:6.42701                                                    
[11]	validation-rmse:6.29737                                                    
[12]	validation-rmse:6.18446





[0]	validation-rmse:5.84696
[1]	validation-rmse:5.49581                                                     
[2]	validation-rmse:5.42114                                                     
[3]	validation-rmse:5.38864                                                     
[4]	validation-rmse:5.37585                                                     
[5]	validation-rmse:5.36441                                                     
[6]	validation-rmse:5.36148                                                     
[7]	validation-rmse:5.35423                                                     
[8]	validation-rmse:5.35194                                                     
[9]	validation-rmse:5.34681                                                     
[10]	validation-rmse:5.34198                                                    
[11]	validation-rmse:5.33597                                                    
[12]	validation-rmse:5.33071                                                    






[0]	validation-rmse:8.51272                                                     
[1]	validation-rmse:7.84572                                                     
[2]	validation-rmse:7.30100                                                     
[3]	validation-rmse:6.85940                                                     
[4]	validation-rmse:6.50848                                                     
[5]	validation-rmse:6.22716                                                     
[6]	validation-rmse:6.00282                                                     
[7]	validation-rmse:5.83002                                                     
[8]	validation-rmse:5.69074                                                     
[9]	validation-rmse:5.58015                                                     
[10]	validation-rmse:5.50153                                                    
[11]	validation-rmse:5.43534                                                    
[12]	validation-rmse:5.38136





[0]	validation-rmse:7.73934                                                     
[1]	validation-rmse:6.73355                                                     
[2]	validation-rmse:6.12126                                                     
[3]	validation-rmse:5.76090                                                     
[4]	validation-rmse:5.55158                                                     
[5]	validation-rmse:5.42852                                                     
[6]	validation-rmse:5.35174                                                     
[7]	validation-rmse:5.30630                                                     
[8]	validation-rmse:5.27663                                                     
[9]	validation-rmse:5.25548                                                     
[10]	validation-rmse:5.24101                                                    
[11]	validation-rmse:5.22777                                                    
[12]	validation-rmse:5.22095





[0]	validation-rmse:8.78258                                                     
[1]	validation-rmse:8.30885                                                     
[2]	validation-rmse:7.89302                                                     
[3]	validation-rmse:7.53019                                                     
[4]	validation-rmse:7.21467                                                     
[5]	validation-rmse:6.94023                                                     
[6]	validation-rmse:6.70594                                                     
[7]	validation-rmse:6.50081                                                     
[8]	validation-rmse:6.32621                                                     
[9]	validation-rmse:6.17258                                                     
[10]	validation-rmse:6.04295                                                    
[11]	validation-rmse:5.93104                                                    
[12]	validation-rmse:5.83776





[0]	validation-rmse:8.71373                                                     
[1]	validation-rmse:8.18974                                                     
[2]	validation-rmse:7.74053                                                     
[3]	validation-rmse:7.35701                                                     
[4]	validation-rmse:7.03068                                                     
[5]	validation-rmse:6.75216                                                     
[6]	validation-rmse:6.51579                                                     
[7]	validation-rmse:6.31250                                                     
[8]	validation-rmse:6.14932                                                     
[9]	validation-rmse:6.00782                                                     
[10]	validation-rmse:5.89478                                                    
[11]	validation-rmse:5.79113                                                    
[12]	validation-rmse:5.71226





[1]	validation-rmse:8.18441                                                     
[2]	validation-rmse:7.73420                                                     
[3]	validation-rmse:7.35197                                                     
[4]	validation-rmse:7.02669                                                     
[5]	validation-rmse:6.75197                                                     
[6]	validation-rmse:6.52296                                                     
[7]	validation-rmse:6.32580                                                     
[8]	validation-rmse:6.16175                                                     
[9]	validation-rmse:6.02469                                                     
[10]	validation-rmse:5.91302                                                    
[11]	validation-rmse:5.81729                                                    
[12]	validation-rmse:5.73878                                                    
[13]	validation-rmse:5.67461





[0]	validation-rmse:9.01017                                                     
[1]	validation-rmse:8.71937                                                     
[2]	validation-rmse:8.44888                                                     
[3]	validation-rmse:8.19663                                                     
[4]	validation-rmse:7.96223                                                     
[5]	validation-rmse:7.74510                                                     
[6]	validation-rmse:7.54349                                                     
[7]	validation-rmse:7.35707                                                     
[8]	validation-rmse:7.18451                                                     
[9]	validation-rmse:7.02481                                                     
[10]	validation-rmse:6.87658                                                    
[11]	validation-rmse:6.74077                                                    
[12]	validation-rmse:6.61482





[0]	validation-rmse:8.71377                                                     
[1]	validation-rmse:8.18715                                                     
[2]	validation-rmse:7.73224                                                     
[3]	validation-rmse:7.34189                                                     
[4]	validation-rmse:7.00917                                                     
[5]	validation-rmse:6.72395                                                     
[6]	validation-rmse:6.48231                                                     
[7]	validation-rmse:6.27806                                                     
[8]	validation-rmse:6.10700                                                     
[9]	validation-rmse:5.96227                                                     
[10]	validation-rmse:5.83967                                                    
[11]	validation-rmse:5.73832                                                    
[12]	validation-rmse:5.65384





[0]	validation-rmse:8.57400                                                    
[1]	validation-rmse:7.95097                                                    
[2]	validation-rmse:7.43352                                                    
[3]	validation-rmse:7.00592                                                    
[4]	validation-rmse:6.66022                                                    
[5]	validation-rmse:6.37540                                                    
[6]	validation-rmse:6.14703                                                    
[7]	validation-rmse:5.96291                                                    
[8]	validation-rmse:5.81581                                                    
[9]	validation-rmse:5.69591                                                    
[10]	validation-rmse:5.60224                                                   
[11]	validation-rmse:5.52738                                                   
[12]	validation-rmse:5.46746            





[0]	validation-rmse:9.00327                                                    
[1]	validation-rmse:8.70658                                                    
[2]	validation-rmse:8.43012                                                    
[3]	validation-rmse:8.17357                                                    
[4]	validation-rmse:7.93512                                                    
[5]	validation-rmse:7.71355                                                    
[6]	validation-rmse:7.50883                                                    
[7]	validation-rmse:7.31959                                                    
[8]	validation-rmse:7.14467                                                    
[9]	validation-rmse:6.98342                                                    
[10]	validation-rmse:6.83528                                                   
[11]	validation-rmse:6.69811                                                   
[12]	validation-rmse:6.57241            





[0]	validation-rmse:7.51049                                                     
[1]	validation-rmse:6.44804                                                     
[2]	validation-rmse:5.86105                                                     
[3]	validation-rmse:5.54187                                                     
[4]	validation-rmse:5.37534                                                     
[5]	validation-rmse:5.29024                                                     
[6]	validation-rmse:5.24127                                                     
[7]	validation-rmse:5.21403                                                     
[8]	validation-rmse:5.20058                                                     
[9]	validation-rmse:5.18755                                                     
[10]	validation-rmse:5.18163                                                    
[11]	validation-rmse:5.17699                                                    
[12]	validation-rmse:5.17539





[0]	validation-rmse:9.00948                                                     
[1]	validation-rmse:8.71829                                                     
[2]	validation-rmse:8.44755                                                     
[3]	validation-rmse:8.19618                                                     
[4]	validation-rmse:7.96311                                                     
[5]	validation-rmse:7.74706                                                     
[6]	validation-rmse:7.54711                                                     
[7]	validation-rmse:7.36241                                                     
[8]	validation-rmse:7.19185                                                     
[9]	validation-rmse:7.03468                                                     
[10]	validation-rmse:6.89014                                                    
[11]	validation-rmse:6.75689                                                    
[12]	validation-rmse:6.63463





[0]	validation-rmse:8.56923                                                     
[1]	validation-rmse:7.94063                                                     
[2]	validation-rmse:7.42216                                                     
[3]	validation-rmse:6.99451                                                     
[4]	validation-rmse:6.64563                                                     
[5]	validation-rmse:6.36306                                                     
[6]	validation-rmse:6.13469                                                     
[7]	validation-rmse:5.95115                                                     
[8]	validation-rmse:5.80537                                                     
[9]	validation-rmse:5.68734                                                     
[10]	validation-rmse:5.59476                                                    
[11]	validation-rmse:5.51880                                                    
[12]	validation-rmse:5.45996





[0]	validation-rmse:8.80904                                                     
[1]	validation-rmse:8.35609                                                     
[2]	validation-rmse:7.95737                                                     
[3]	validation-rmse:7.60762                                                     
[4]	validation-rmse:7.30251                                                     
[5]	validation-rmse:7.03617                                                     
[6]	validation-rmse:6.80518                                                     
[7]	validation-rmse:6.60254                                                     
[8]	validation-rmse:6.42741                                                     
[9]	validation-rmse:6.27813                                                     
[10]	validation-rmse:6.14750                                                    
[11]	validation-rmse:6.03596                                                    
[12]	validation-rmse:5.93951





[0]	validation-rmse:7.31956                                                     
[1]	validation-rmse:6.28580                                                     
[2]	validation-rmse:5.78382                                                     
[3]	validation-rmse:5.54644                                                     
[4]	validation-rmse:5.42995                                                     
[5]	validation-rmse:5.36853                                                     
[6]	validation-rmse:5.33411                                                     
[7]	validation-rmse:5.31580                                                     
[8]	validation-rmse:5.30379                                                     
[9]	validation-rmse:5.29264                                                     
[10]	validation-rmse:5.28541                                                    
[11]	validation-rmse:5.27916                                                    
[12]	validation-rmse:5.26674





[5]	validation-rmse:6.75546                                                     
[6]	validation-rmse:6.53626                                                     
[7]	validation-rmse:6.35719                                                     
[8]	validation-rmse:6.20833                                                     
[9]	validation-rmse:6.08315                                                     
[10]	validation-rmse:5.98104                                                    
[11]	validation-rmse:5.90009                                                    
[12]	validation-rmse:5.83087                                                    
[13]	validation-rmse:5.77377                                                    
[14]	validation-rmse:5.72591                                                    
[15]	validation-rmse:5.68954                                                    
[16]	validation-rmse:5.65714                                                    
[17]	validation-rmse:5.62940





[0]	validation-rmse:8.83451                                                     
[1]	validation-rmse:8.39829                                                     
[2]	validation-rmse:8.00991                                                     
[3]	validation-rmse:7.66497                                                     
[4]	validation-rmse:7.35987                                                     
[5]	validation-rmse:7.08949                                                     
[6]	validation-rmse:6.85162                                                     
[7]	validation-rmse:6.64276                                                     
[8]	validation-rmse:6.45983                                                     
[9]	validation-rmse:6.29963                                                     
[10]	validation-rmse:6.15989                                                    
[11]	validation-rmse:6.03766                                                    
[12]	validation-rmse:5.93139





[0]	validation-rmse:8.85645                                                      
[1]	validation-rmse:8.44037                                                      
[2]	validation-rmse:8.06947                                                      
[3]	validation-rmse:7.73975                                                      
[4]	validation-rmse:7.44774                                                      
[5]	validation-rmse:7.18973                                                      
[6]	validation-rmse:6.96253                                                      
[7]	validation-rmse:6.76294                                                      
[8]	validation-rmse:6.58776                                                      
[9]	validation-rmse:6.43437                                                      
[10]	validation-rmse:6.30074                                                     
[11]	validation-rmse:6.18433                                                     
[12]	validation-





[0]	validation-rmse:8.14848                                                      
[1]	validation-rmse:7.28429                                                      
[2]	validation-rmse:6.66126                                                      
[3]	validation-rmse:6.22217                                                      
[4]	validation-rmse:5.91541                                                      
[5]	validation-rmse:5.70351                                                      
[6]	validation-rmse:5.55935                                                      
[7]	validation-rmse:5.45819                                                      
[8]	validation-rmse:5.38574                                                      
[9]	validation-rmse:5.33668                                                      
[10]	validation-rmse:5.30193                                                     
[11]	validation-rmse:5.27610                                                     
[12]	validation-





[0]	validation-rmse:7.95282                                                      
[1]	validation-rmse:7.02344                                                      
[2]	validation-rmse:6.41317                                                      
[3]	validation-rmse:6.01814                                                      
[4]	validation-rmse:5.76619                                                      
[5]	validation-rmse:5.61090                                                      
[6]	validation-rmse:5.51062                                                      
[7]	validation-rmse:5.44439                                                      
[8]	validation-rmse:5.40089                                                      
[9]	validation-rmse:5.37210                                                      
[10]	validation-rmse:5.35340                                                     
[11]	validation-rmse:5.33871                                                     
[12]	validation-





[2]	validation-rmse:8.46768                                                      
[3]	validation-rmse:8.22205                                                      
[4]	validation-rmse:7.99446                                                      
[5]	validation-rmse:7.78336                                                      
[6]	validation-rmse:7.58836                                                      
[7]	validation-rmse:7.40763                                                      
[8]	validation-rmse:7.24138                                                      
[9]	validation-rmse:7.08783                                                      
[10]	validation-rmse:6.94676                                                     
[11]	validation-rmse:6.81631                                                     
[12]	validation-rmse:6.69683                                                     
[13]	validation-rmse:6.58671                                                     
[14]	validation-





[4]	validation-rmse:6.47230                                                      
[5]	validation-rmse:6.22914                                                      
[6]	validation-rmse:6.04291                                                      
[7]	validation-rmse:5.89924                                                      
[8]	validation-rmse:5.79176                                                      
[9]	validation-rmse:5.71048                                                      
[10]	validation-rmse:5.64778                                                     
[11]	validation-rmse:5.59950                                                     
[12]	validation-rmse:5.56445                                                     
[13]	validation-rmse:5.53405                                                     
[14]	validation-rmse:5.51131                                                     
[15]	validation-rmse:5.49185                                                     
[16]	validation-





[0]	validation-rmse:8.85734                                                      
[1]	validation-rmse:8.44052                                                      
[2]	validation-rmse:8.06724                                                      
[3]	validation-rmse:7.73432                                                      
[4]	validation-rmse:7.43790                                                      
[5]	validation-rmse:7.17489                                                      
[6]	validation-rmse:6.94200                                                      
[7]	validation-rmse:6.73685                                                      
[8]	validation-rmse:6.55602                                                      
[9]	validation-rmse:6.39698                                                      
[10]	validation-rmse:6.25759                                                     
[11]	validation-rmse:6.13577                                                     
[12]	validation-





[1]	validation-rmse:8.58077                                                      
[2]	validation-rmse:8.25827                                                      
[3]	validation-rmse:7.96484                                                      
[4]	validation-rmse:7.69857                                                      
[5]	validation-rmse:7.45726                                                      
[6]	validation-rmse:7.23923                                                      
[7]	validation-rmse:7.04246                                                      
[8]	validation-rmse:6.86505                                                      
[9]	validation-rmse:6.70555                                                      
[10]	validation-rmse:6.56251                                                     
[11]	validation-rmse:6.43424                                                     
[12]	validation-rmse:6.31874                                                     
[13]	validation-





[0]	validation-rmse:9.00727                                                      
[1]	validation-rmse:8.71433                                                      
[2]	validation-rmse:8.44050                                                      
[3]	validation-rmse:8.18744                                                      
[4]	validation-rmse:7.95037                                                      
[5]	validation-rmse:7.73201                                                      
[6]	validation-rmse:7.52808                                                      
[7]	validation-rmse:7.33999                                                      
[8]	validation-rmse:7.16618                                                      
[9]	validation-rmse:7.00381                                                      
[10]	validation-rmse:6.85621                                                     
[11]	validation-rmse:6.71890                                                     
[12]	validation-





[0]	validation-rmse:8.95798                                                      
[1]	validation-rmse:8.62210                                                      
[2]	validation-rmse:8.31275                                                      
[3]	validation-rmse:8.02813                                                      
[4]	validation-rmse:7.76712                                                      
[5]	validation-rmse:7.52766                                                      
[6]	validation-rmse:7.30862                                                      
[7]	validation-rmse:7.10839                                                      
[8]	validation-rmse:6.92601                                                      
[9]	validation-rmse:6.75935                                                      
[10]	validation-rmse:6.60760                                                     
[11]	validation-rmse:6.47003                                                     
[12]	validation-





[0]	validation-rmse:8.42553                                                      
[1]	validation-rmse:7.71197                                                      
[2]	validation-rmse:7.14929                                                      
[3]	validation-rmse:6.71197                                                      
[4]	validation-rmse:6.37534                                                      
[5]	validation-rmse:6.11755                                                      
[6]	validation-rmse:5.92037                                                      
[7]	validation-rmse:5.77285                                                      
[8]	validation-rmse:5.65943                                                      
[9]	validation-rmse:5.57511                                                      
[10]	validation-rmse:5.50950                                                     
[11]	validation-rmse:5.46014                                                     
[12]	validation-





[0]	validation-rmse:7.14844                                                      
[1]	validation-rmse:6.06634                                                      
[2]	validation-rmse:5.57250                                                      
[3]	validation-rmse:5.35722                                                      
[4]	validation-rmse:5.26129                                                      
[5]	validation-rmse:5.21517                                                      
[6]	validation-rmse:5.19797                                                      
[7]	validation-rmse:5.18706                                                      
[8]	validation-rmse:5.18009                                                      
[9]	validation-rmse:5.17571                                                      
[10]	validation-rmse:5.17306                                                     
[11]	validation-rmse:5.17199                                                     
[12]	validation-





[0]	validation-rmse:8.07008                                                      
[1]	validation-rmse:7.18631                                                      
[2]	validation-rmse:6.57706                                                      
[3]	validation-rmse:6.16609                                                      
[4]	validation-rmse:5.89306                                                      
[5]	validation-rmse:5.71338                                                      
[6]	validation-rmse:5.59365                                                      
[7]	validation-rmse:5.51267                                                      
[8]	validation-rmse:5.46065                                                      
[9]	validation-rmse:5.42309                                                      
[10]	validation-rmse:5.39880                                                     
[11]	validation-rmse:5.37582                                                     
[12]	validation-





[0]	validation-rmse:8.26912                                                      
[1]	validation-rmse:7.47752                                                      
[2]	validation-rmse:6.89226                                                      
[3]	validation-rmse:6.46593                                                      
[4]	validation-rmse:6.16024                                                      
[5]	validation-rmse:5.94360                                                      
[6]	validation-rmse:5.79136                                                      
[7]	validation-rmse:5.68243                                                      
[8]	validation-rmse:5.60516                                                      
[9]	validation-rmse:5.54960                                                      
[10]	validation-rmse:5.51026                                                     
[11]	validation-rmse:5.47708                                                     
[12]	validation-





[0]	validation-rmse:8.79025                                                      
[1]	validation-rmse:8.32005                                                      
[2]	validation-rmse:7.90659                                                      
[3]	validation-rmse:7.54323                                                      
[4]	validation-rmse:7.22641                                                      
[5]	validation-rmse:6.94981                                                      
[6]	validation-rmse:6.71051                                                      
[7]	validation-rmse:6.50303                                                      
[8]	validation-rmse:6.32363                                                      
[9]	validation-rmse:6.16987                                                      
[10]	validation-rmse:6.03753                                                     
[11]	validation-rmse:5.92373                                                     
[12]	validation-





[0]	validation-rmse:8.94070                                                      
[1]	validation-rmse:8.59026                                                      
[2]	validation-rmse:8.26890                                                      
[3]	validation-rmse:7.97458                                                      
[4]	validation-rmse:7.70599                                                      
[5]	validation-rmse:7.46084                                                      
[6]	validation-rmse:7.23761                                                      
[7]	validation-rmse:7.03425                                                      
[8]	validation-rmse:6.85003                                                      
[9]	validation-rmse:6.68326                                                      
[10]	validation-rmse:6.53274                                                     
[11]	validation-rmse:6.39703                                                     
[12]	validation-





[2]	validation-rmse:5.42912                                                      
[3]	validation-rmse:5.40672                                                      
[4]	validation-rmse:5.39400                                                      
[5]	validation-rmse:5.38169                                                      
[6]	validation-rmse:5.37351                                                      
[7]	validation-rmse:5.36494                                                      
[8]	validation-rmse:5.36050                                                      
[9]	validation-rmse:5.35389                                                      
[10]	validation-rmse:5.34758                                                     
[11]	validation-rmse:5.33851                                                     
[12]	validation-rmse:5.33402                                                     
[13]	validation-rmse:5.32917                                                     
[14]	validation-




In [25]:
mlflow.xgboost.autolog(disable=True)

In [26]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



[0]	validation-rmse:8.73788
[1]	validation-rmse:8.22960
[2]	validation-rmse:7.78914
[3]	validation-rmse:7.40823
[4]	validation-rmse:7.08398
[5]	validation-rmse:6.80130
[6]	validation-rmse:6.56559
[7]	validation-rmse:6.35942
[8]	validation-rmse:6.18716
[9]	validation-rmse:6.04364
[10]	validation-rmse:5.91994
[11]	validation-rmse:5.81441
[12]	validation-rmse:5.72701
[13]	validation-rmse:5.65236
[14]	validation-rmse:5.58821
[15]	validation-rmse:5.53629
[16]	validation-rmse:5.49451
[17]	validation-rmse:5.45443
[18]	validation-rmse:5.42147
[19]	validation-rmse:5.39347
[20]	validation-rmse:5.37267
[21]	validation-rmse:5.35128
[22]	validation-rmse:5.33257
[23]	validation-rmse:5.31780
[24]	validation-rmse:5.30449
[25]	validation-rmse:5.29542
[26]	validation-rmse:5.28365
[27]	validation-rmse:5.27505
[28]	validation-rmse:5.26725
[29]	validation-rmse:5.26004
[30]	validation-rmse:5.25388
[31]	validation-rmse:5.24812
[32]	validation-rmse:5.24406
[33]	validation-rmse:5.24068
[34]	validation-rmse:5.2



In [27]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
        mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        



