In [1]:
!python -V

Python 3.9.19


In [1]:
import pandas as pd

In [2]:
import pickle

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import root_mean_squared_error, mean_squared_error

In [5]:
!mlflow --version

mlflow, version 2.13.0


In [6]:
import mlflow

!mlflow --version

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

mlflow, version 2.13.0


<Experiment: artifact_location=('file:///c:/Users/ramsi/OneDrive/Documents/Github '
 'Projects/mlops-zoomcamp/02-experiment-tracking/mlruns/1'), creation_time=1716844413883, experiment_id='1', last_update_time=1716844413883, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [7]:
def read_dataframe(filename):
    if filename.endswith(".csv"):
        df = pd.read_csv(filename)

        df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
        df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)
    elif filename.endswith(".parquet"):
        df = pd.read_parquet(filename)

    df["duration"] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ["PULocationID", "DOLocationID"]
    df[categorical] = df[categorical].astype(str)

    return df

In [8]:
df_train = read_dataframe("data/green_tripdata_2021-01.parquet")
df_val = read_dataframe("data/green_tripdata_2021-02.parquet")

In [9]:
len(df_train), len(df_val)

(73908, 61921)

In [10]:
df_train["PU_DO"] = df_train["PULocationID"] + "_" + df_train["DOLocationID"]
df_val["PU_DO"] = df_val["PULocationID"] + "_" + df_val["DOLocationID"]

In [11]:
categorical = ["PU_DO"]  #'PULocationID', 'DOLocationID']
numerical = ["trip_distance"]

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient="records")
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient="records")
X_val = dv.transform(val_dicts)

In [14]:
target = "duration"
y_train = df_train[target].values
y_val = df_val[target].values

In [24]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



7.758715205396466

In [25]:
with open("models/lin_reg.bin", "wb") as f_out:
    pickle.dump((dv, lr), f_out)

In [31]:
lr = Lasso(0.5)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

root_mean_squared_error(y_val, y_pred)

12.212582619743838

with mlflow.start_run():

    mlflow.set_tag("developer", "ramsi")

    mlflow.log_param("train-data-path", "data/green_tripdata_2021-01.parquet")
    mlflow.log_param("valid-data-path", "data/green_tripdata_2021-02.parquet")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(
        local_path="models/lin_reg.bin", artifact_path="models_pickle"
    )


In [12]:
import xgboost as xgb

In [35]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [15]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [16]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, "validation")],
            early_stopping_rounds=50,
        )
        y_pred = booster.predict(valid)
        rmse = root_mean_squared_error(y_val, y_pred)
        mlflow.log_metric("rmse", rmse)

    return {"loss": rmse, "status": STATUS_OK}

In [41]:
search_space = {
    "max_depth": scope.int(hp.quniform("max_depth", 4, 100, 1)),
    "learning_rate": hp.loguniform("learning_rate", -3, 0),
    "reg_alpha": hp.loguniform("reg_alpha", -5, -1),
    "reg_lambda": hp.loguniform("reg_lambda", -6, -1),
    "min_child_weight": hp.loguniform("min_child_weight", -1, 3),
    "objective": "reg:linear",
    "seed": 42,
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials(),
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:11.77803                          
[1]	validation-rmse:11.37241                          
[2]	validation-rmse:10.99503                          
[3]	validation-rmse:10.64400                          
[4]	validation-rmse:10.31805                          
[5]	validation-rmse:10.01553                          
[6]	validation-rmse:9.73475                           
[7]	validation-rmse:9.47540                           
[8]	validation-rmse:9.23575                           
[9]	validation-rmse:9.01420                           
[10]	validation-rmse:8.80977                          
[11]	validation-rmse:8.62117                          
[12]	validation-rmse:8.44767                          
[13]	validation-rmse:8.28808                          
[14]	validation-rmse:8.14092                          
[15]	validation-rmse:8.00568                          
[16]	validation-rmse:7.88118                          
[17]	validation-rmse:7.76754                          
[18]	valid




[0]	validation-rmse:10.63398                                                     
[1]	validation-rmse:9.46973                                                      
[2]	validation-rmse:8.62516                                                      
[3]	validation-rmse:8.02336                                                      
[4]	validation-rmse:7.59979                                                      
[5]	validation-rmse:7.30728                                                      
[6]	validation-rmse:7.09685                                                      
[7]	validation-rmse:6.95138                                                      
[8]	validation-rmse:6.84828                                                      
[9]	validation-rmse:6.77554                                                      
[10]	validation-rmse:6.71964                                                     
[11]	validation-rmse:6.67919                                                     
[12]	validation-




[0]	validation-rmse:10.96317                                                     
[1]	validation-rmse:9.97446                                                      
[2]	validation-rmse:9.19850                                                      
[3]	validation-rmse:8.59694                                                      
[4]	validation-rmse:8.13535                                                      
[5]	validation-rmse:7.78311                                                      
[6]	validation-rmse:7.51639                                                      
[7]	validation-rmse:7.31424                                                      
[8]	validation-rmse:7.16108                                                      
[9]	validation-rmse:7.04380                                                      
[10]	validation-rmse:6.95475                                                     
[11]	validation-rmse:6.88644                                                     
[12]	validation-




[0]	validation-rmse:9.90714                                                      
[1]	validation-rmse:8.52289                                                      
[2]	validation-rmse:7.72666                                                      
[3]	validation-rmse:7.28062                                                      
[4]	validation-rmse:7.03075                                                      
[5]	validation-rmse:6.88945                                                      
[6]	validation-rmse:6.80147                                                      
[7]	validation-rmse:6.74682                                                      
[8]	validation-rmse:6.71069                                                      
[9]	validation-rmse:6.68567                                                      
[10]	validation-rmse:6.66931                                                     
[11]	validation-rmse:6.65645                                                     
[12]	validation-




[0]	validation-rmse:9.05395                                                      
[1]	validation-rmse:7.61211                                                      
[2]	validation-rmse:6.99701                                                      
[3]	validation-rmse:6.73544                                                      
[4]	validation-rmse:6.61171                                                      
[5]	validation-rmse:6.55214                                                      
[6]	validation-rmse:6.52332                                                      
[7]	validation-rmse:6.49988                                                      
[8]	validation-rmse:6.48845                                                      
[9]	validation-rmse:6.47746                                                      
[10]	validation-rmse:6.47075                                                     
[11]	validation-rmse:6.46288                                                     
[12]	validation-




[0]	validation-rmse:11.06195                                                     
[1]	validation-rmse:10.12964                                                     
[2]	validation-rmse:9.37863                                                      
[3]	validation-rmse:8.78137                                                      
[4]	validation-rmse:8.31011                                                      
[5]	validation-rmse:7.93879                                                      
[6]	validation-rmse:7.64586                                                      
[7]	validation-rmse:7.41970                                                      
[8]	validation-rmse:7.24203                                                      
[9]	validation-rmse:7.10678                                                      
[10]	validation-rmse:6.99862                                                     
[11]	validation-rmse:6.91317                                                     
[12]	validation-




[0]	validation-rmse:11.31368                                                     
[1]	validation-rmse:10.54699                                                     
[2]	validation-rmse:9.89684                                                      
[3]	validation-rmse:9.34791                                                      
[4]	validation-rmse:8.88778                                                      
[5]	validation-rmse:8.50323                                                      
[6]	validation-rmse:8.18326                                                      
[7]	validation-rmse:7.91868                                                      
[8]	validation-rmse:7.69976                                                      
[9]	validation-rmse:7.51911                                                      
[10]	validation-rmse:7.37006                                                     
[11]	validation-rmse:7.24759                                                     
[12]	validation-




[0]	validation-rmse:10.63688                                                     
[1]	validation-rmse:9.46281                                                      
[2]	validation-rmse:8.60371                                                      
[3]	validation-rmse:7.98201                                                      
[4]	validation-rmse:7.54460                                                      
[5]	validation-rmse:7.23379                                                      
[6]	validation-rmse:7.01586                                                      
[7]	validation-rmse:6.86200                                                      
[8]	validation-rmse:6.75318                                                      
[9]	validation-rmse:6.67076                                                      
[10]	validation-rmse:6.61213                                                     
[11]	validation-rmse:6.56823                                                     
[12]	validation-




[0]	validation-rmse:11.78669                                                      
[1]	validation-rmse:11.39000                                                      
[2]	validation-rmse:11.02129                                                      
[3]	validation-rmse:10.67903                                                      
[4]	validation-rmse:10.36193                                                      
[5]	validation-rmse:10.06809                                                      
[6]	validation-rmse:9.79612                                                       
[7]	validation-rmse:9.54486                                                       
[8]	validation-rmse:9.31289                                                       
[9]	validation-rmse:9.09897                                                       
[10]	validation-rmse:8.90197                                                      
[11]	validation-rmse:8.72058                                                      
[12]




[2]	validation-rmse:6.74340                                                       
[3]	validation-rmse:6.73766                                                       
[4]	validation-rmse:6.73222                                                       
[5]	validation-rmse:6.72430                                                       
[6]	validation-rmse:6.71834                                                       
[7]	validation-rmse:6.71090                                                       
[8]	validation-rmse:6.70288                                                       
[9]	validation-rmse:6.69800                                                       
[10]	validation-rmse:6.69238                                                      
[11]	validation-rmse:6.69104                                                      
[12]	validation-rmse:6.68673                                                      
[13]	validation-rmse:6.67988                                                      
[14]




[0]	validation-rmse:9.33993                                                     
[1]	validation-rmse:7.90234                                                     
[2]	validation-rmse:7.22443                                                     
[3]	validation-rmse:6.91061                                                     
[4]	validation-rmse:6.76367                                                     
[5]	validation-rmse:6.68053                                                     
[6]	validation-rmse:6.63567                                                     
[7]	validation-rmse:6.61051                                                     
[8]	validation-rmse:6.59377                                                     
[9]	validation-rmse:6.58330                                                     
[10]	validation-rmse:6.57924                                                    
[11]	validation-rmse:6.57589                                                    
[12]	validation-rmse:6.57189




[5]	validation-rmse:6.89854                                                     
[6]	validation-rmse:6.85796                                                     
[7]	validation-rmse:6.83678                                                     
[8]	validation-rmse:6.82301                                                     
[9]	validation-rmse:6.81975                                                     
[10]	validation-rmse:6.81712                                                    
[11]	validation-rmse:6.81115                                                    
[12]	validation-rmse:6.80872                                                    
[13]	validation-rmse:6.79987                                                    
[14]	validation-rmse:6.79495                                                    
[15]	validation-rmse:6.79230                                                    
[16]	validation-rmse:6.79007                                                    
[17]	validation-rmse:6.78590




[0]	validation-rmse:7.73188                                                     
[1]	validation-rmse:6.88913                                                     
[2]	validation-rmse:6.71754                                                     
[3]	validation-rmse:6.66500                                                     
[4]	validation-rmse:6.63721                                                     
[5]	validation-rmse:6.62743                                                     
[6]	validation-rmse:6.62003                                                     
[7]	validation-rmse:6.61653                                                     
[8]	validation-rmse:6.61082                                                     
[9]	validation-rmse:6.60391                                                     
[10]	validation-rmse:6.58981                                                    
[11]	validation-rmse:6.58501                                                    
[12]	validation-rmse:6.58400




[0]	validation-rmse:6.99114                                                     
[1]	validation-rmse:6.62926                                                     
[2]	validation-rmse:6.58956                                                     
[3]	validation-rmse:6.57823                                                     
[4]	validation-rmse:6.56756                                                     
[5]	validation-rmse:6.56158                                                     
[6]	validation-rmse:6.54774                                                     
[7]	validation-rmse:6.54338                                                     
[8]	validation-rmse:6.53629                                                     
[9]	validation-rmse:6.52836                                                     
[10]	validation-rmse:6.51957                                                    
[11]	validation-rmse:6.51489                                                    
[12]	validation-rmse:6.50818




[0]	validation-rmse:11.64566                                                    
[1]	validation-rmse:11.12965                                                    
[2]	validation-rmse:10.66110                                                    
[3]	validation-rmse:10.23653                                                    
[4]	validation-rmse:9.85270                                                     
[5]	validation-rmse:9.50632                                                     
[6]	validation-rmse:9.19400                                                     
[7]	validation-rmse:8.91296                                                     
[8]	validation-rmse:8.66096                                                     
[9]	validation-rmse:8.43510                                                     
[10]	validation-rmse:8.23290                                                    
[11]	validation-rmse:8.05212                                                    
[12]	validation-rmse:7.89204




[3]	validation-rmse:10.26657                                                    
[4]	validation-rmse:9.89034                                                     
[5]	validation-rmse:9.55236                                                     
[6]	validation-rmse:9.24781                                                     
[7]	validation-rmse:8.97539                                                     
[8]	validation-rmse:8.73181                                                     
[9]	validation-rmse:8.51416                                                     
[10]	validation-rmse:8.31858                                                    
[11]	validation-rmse:8.14492                                                    
[12]	validation-rmse:7.99103                                                    
[13]	validation-rmse:7.85255                                                    
[14]	validation-rmse:7.73085                                                    
[15]	validation-rmse:7.62261




[2]	validation-rmse:7.48355                                                     
[3]	validation-rmse:7.12006                                                     
[4]	validation-rmse:6.93575                                                     
[5]	validation-rmse:6.83883                                                     
[6]	validation-rmse:6.78723                                                     
[7]	validation-rmse:6.75385                                                     
[8]	validation-rmse:6.73255                                                     
[9]	validation-rmse:6.72024                                                     
[10]	validation-rmse:6.71210                                                    
[11]	validation-rmse:6.71010                                                    
[12]	validation-rmse:6.70632                                                    
[13]	validation-rmse:6.70341                                                    
[14]	validation-rmse:6.69791




[4]	validation-rmse:6.82679                                                     
[5]	validation-rmse:6.82136                                                     
[6]	validation-rmse:6.81753                                                     
[7]	validation-rmse:6.80623                                                     
[8]	validation-rmse:6.79907                                                     
[9]	validation-rmse:6.79407                                                     
[10]	validation-rmse:6.79168                                                    
[11]	validation-rmse:6.78887                                                    
[12]	validation-rmse:6.78491                                                    
[13]	validation-rmse:6.78224                                                    
[14]	validation-rmse:6.77861                                                    
[15]	validation-rmse:6.77642                                                    
[16]	validation-rmse:6.76968




[0]	validation-rmse:7.27140                                                     
[1]	validation-rmse:6.65675                                                     
[2]	validation-rmse:6.55406                                                     
[3]	validation-rmse:6.52474                                                     
[4]	validation-rmse:6.51650                                                     
[5]	validation-rmse:6.50319                                                     
[6]	validation-rmse:6.49573                                                     
[7]	validation-rmse:6.48452                                                     
[8]	validation-rmse:6.47666                                                     
[9]	validation-rmse:6.46993                                                     
[10]	validation-rmse:6.46227                                                    
[11]	validation-rmse:6.45665                                                    
[12]	validation-rmse:6.45428




[0]	validation-rmse:9.21164                                                     
[1]	validation-rmse:7.81237                                                     
[2]	validation-rmse:7.20024                                                     
[3]	validation-rmse:6.93753                                                     
[4]	validation-rmse:6.81458                                                     
[5]	validation-rmse:6.74845                                                     
[6]	validation-rmse:6.71687                                                     
[7]	validation-rmse:6.69832                                                     
[8]	validation-rmse:6.67946                                                     
[9]	validation-rmse:6.67045                                                     
[10]	validation-rmse:6.66540                                                    
[11]	validation-rmse:6.66017                                                    
[12]	validation-rmse:6.65835




[0]	validation-rmse:11.15188                                                    
[1]	validation-rmse:10.27982                                                    
[2]	validation-rmse:9.56427                                                     
[3]	validation-rmse:8.98356                                                     
[4]	validation-rmse:8.51121                                                     
[5]	validation-rmse:8.13534                                                     
[6]	validation-rmse:7.83616                                                     
[7]	validation-rmse:7.59561                                                     
[8]	validation-rmse:7.40486                                                     
[9]	validation-rmse:7.25171                                                     
[10]	validation-rmse:7.13204                                                    
[11]	validation-rmse:7.03622                                                    
[12]	validation-rmse:6.96087




[0]	validation-rmse:8.34241                                                     
[1]	validation-rmse:7.15002                                                     
[2]	validation-rmse:6.80587                                                     
[3]	validation-rmse:6.68598                                                     
[4]	validation-rmse:6.63815                                                     
[5]	validation-rmse:6.61525                                                     
[6]	validation-rmse:6.60156                                                     
[7]	validation-rmse:6.58964                                                     
[8]	validation-rmse:6.58264                                                     
[9]	validation-rmse:6.57821                                                     
[10]	validation-rmse:6.57167                                                    
[11]	validation-rmse:6.56638                                                    
[12]	validation-rmse:6.56229




[0]	validation-rmse:11.38864                                                    
[1]	validation-rmse:10.67645                                                    
[2]	validation-rmse:10.06409                                                    
[3]	validation-rmse:9.53895                                                     
[4]	validation-rmse:9.09082                                                     
[5]	validation-rmse:8.70974                                                     
[6]	validation-rmse:8.38896                                                     
[7]	validation-rmse:8.11622                                                     
[8]	validation-rmse:7.88898                                                     
[9]	validation-rmse:7.69757                                                     
[10]	validation-rmse:7.53489                                                    
[11]	validation-rmse:7.39984                                                    
[12]	validation-rmse:7.28501




[0]	validation-rmse:10.30907                                                    
[1]	validation-rmse:9.00935                                                     
[2]	validation-rmse:8.14393                                                     
[3]	validation-rmse:7.58071                                                     
[4]	validation-rmse:7.21776                                                     
[5]	validation-rmse:6.98466                                                     
[6]	validation-rmse:6.83497                                                     
[7]	validation-rmse:6.73581                                                     
[8]	validation-rmse:6.66585                                                     
[9]	validation-rmse:6.61927                                                     
[10]	validation-rmse:6.58528                                                    
[11]	validation-rmse:6.55967                                                    
[12]	validation-rmse:6.54344




[0]	validation-rmse:10.16365                                                    
[1]	validation-rmse:8.82535                                                     
[2]	validation-rmse:7.98828                                                     
[3]	validation-rmse:7.47082                                                     
[4]	validation-rmse:7.14110                                                     
[5]	validation-rmse:6.94668                                                     
[6]	validation-rmse:6.82194                                                     
[7]	validation-rmse:6.74346                                                     
[8]	validation-rmse:6.68467                                                     
[9]	validation-rmse:6.64830                                                     
[10]	validation-rmse:6.62372                                                    
[11]	validation-rmse:6.60647                                                    
[12]	validation-rmse:6.59430




[0]	validation-rmse:8.19853                                                     
[1]	validation-rmse:7.02955                                                     
[2]	validation-rmse:6.70390                                                     
[3]	validation-rmse:6.59280                                                     
[4]	validation-rmse:6.54725                                                     
[5]	validation-rmse:6.52369                                                     
[6]	validation-rmse:6.51500                                                     
[7]	validation-rmse:6.51029                                                     
[8]	validation-rmse:6.50283                                                     
[9]	validation-rmse:6.49858                                                     
[10]	validation-rmse:6.49377                                                    
[11]	validation-rmse:6.49050                                                    
[12]	validation-rmse:6.48364




[0]	validation-rmse:10.25073                                                    
[1]	validation-rmse:8.93913                                                     
[2]	validation-rmse:8.08968                                                     
[3]	validation-rmse:7.54770                                                     
[4]	validation-rmse:7.19883                                                     
[5]	validation-rmse:6.99373                                                     
[6]	validation-rmse:6.84669                                                     
[7]	validation-rmse:6.75813                                                     
[8]	validation-rmse:6.69958                                                     
[9]	validation-rmse:6.65101                                                     
[10]	validation-rmse:6.62474                                                    
[11]	validation-rmse:6.60349                                                    
[12]	validation-rmse:6.58986




[0]	validation-rmse:8.58704                                                     
[1]	validation-rmse:7.30974                                                     
[2]	validation-rmse:6.88816                                                     
[3]	validation-rmse:6.73845                                                     
[4]	validation-rmse:6.67883                                                     
[5]	validation-rmse:6.64892                                                     
[6]	validation-rmse:6.62807                                                     
[7]	validation-rmse:6.62730                                                     
[8]	validation-rmse:6.61816                                                     
[9]	validation-rmse:6.61400                                                     
[10]	validation-rmse:6.60671                                                    
[11]	validation-rmse:6.60452                                                    
[12]	validation-rmse:6.60363




[0]	validation-rmse:9.85653                                                     
[1]	validation-rmse:8.43053                                                     
[2]	validation-rmse:7.60238                                                     
[3]	validation-rmse:7.13603                                                     
[4]	validation-rmse:6.87339                                                     
[5]	validation-rmse:6.72400                                                     
[6]	validation-rmse:6.63106                                                     
[7]	validation-rmse:6.57579                                                     
[8]	validation-rmse:6.53913                                                     
[9]	validation-rmse:6.51535                                                     
[10]	validation-rmse:6.49838                                                    
[11]	validation-rmse:6.48765                                                    
[12]	validation-rmse:6.47578




[0]	validation-rmse:10.74709                                                    
[1]	validation-rmse:9.64190                                                     
[2]	validation-rmse:8.81278                                                     
[3]	validation-rmse:8.20122                                                     
[4]	validation-rmse:7.75627                                                     
[5]	validation-rmse:7.43047                                                     
[6]	validation-rmse:7.20282                                                     
[7]	validation-rmse:7.03811                                                     
[8]	validation-rmse:6.91629                                                     
[9]	validation-rmse:6.82133                                                     
[10]	validation-rmse:6.75607                                                    
[11]	validation-rmse:6.70456                                                    
[12]	validation-rmse:6.66931




[0]	validation-rmse:10.38445                                                    
[1]	validation-rmse:9.12129                                                     
[2]	validation-rmse:8.26988                                                     
[3]	validation-rmse:7.70513                                                     
[4]	validation-rmse:7.33902                                                     
[5]	validation-rmse:7.10498                                                     
[6]	validation-rmse:6.94131                                                     
[7]	validation-rmse:6.83523                                                     
[8]	validation-rmse:6.76476                                                     
[9]	validation-rmse:6.70804                                                     
[10]	validation-rmse:6.67417                                                    
[11]	validation-rmse:6.65242                                                    
[12]	validation-rmse:6.63353




[0]	validation-rmse:10.89702                                                    
[1]	validation-rmse:9.87382                                                     
[2]	validation-rmse:9.08357                                                     
[3]	validation-rmse:8.48058                                                     
[4]	validation-rmse:8.03055                                                     
[5]	validation-rmse:7.69057                                                     
[6]	validation-rmse:7.43722                                                     
[7]	validation-rmse:7.24555                                                     
[8]	validation-rmse:7.10643                                                     
[9]	validation-rmse:6.99819                                                     
[10]	validation-rmse:6.91933                                                    
[11]	validation-rmse:6.85666                                                    
[12]	validation-rmse:6.81172




[0]	validation-rmse:9.71943                                                     
[1]	validation-rmse:8.27808                                                     
[2]	validation-rmse:7.48395                                                     
[3]	validation-rmse:7.05376                                                     
[4]	validation-rmse:6.82342                                                     
[5]	validation-rmse:6.69511                                                     
[6]	validation-rmse:6.61714                                                     
[7]	validation-rmse:6.57293                                                     
[8]	validation-rmse:6.54090                                                     
[9]	validation-rmse:6.52150                                                     
[10]	validation-rmse:6.50750                                                    
[11]	validation-rmse:6.49730                                                    
[12]	validation-rmse:6.49393




[0]	validation-rmse:8.81260                                                     
[1]	validation-rmse:7.43568                                                     
[2]	validation-rmse:6.92402                                                     
[3]	validation-rmse:6.72117                                                     
[4]	validation-rmse:6.63164                                                     
[5]	validation-rmse:6.58720                                                     
[6]	validation-rmse:6.56410                                                     
[7]	validation-rmse:6.54985                                                     
[8]	validation-rmse:6.54567                                                     
[9]	validation-rmse:6.54057                                                     
[10]	validation-rmse:6.53593                                                    
[11]	validation-rmse:6.53082                                                    
[12]	validation-rmse:6.52578




[0]	validation-rmse:11.45765                                                    
[1]	validation-rmse:10.79353                                                    
[2]	validation-rmse:10.21118                                                    
[3]	validation-rmse:9.70335                                                     
[4]	validation-rmse:9.26086                                                     
[5]	validation-rmse:8.87678                                                     
[6]	validation-rmse:8.54478                                                     
[7]	validation-rmse:8.25927                                                     
[8]	validation-rmse:8.01345                                                     
[9]	validation-rmse:7.80208                                                     
[10]	validation-rmse:7.62085                                                    
[11]	validation-rmse:7.46522                                                    
[12]	validation-rmse:7.33249




[0]	validation-rmse:10.42120                                                    
[1]	validation-rmse:9.17416                                                     
[2]	validation-rmse:8.32365                                                     
[3]	validation-rmse:7.75127                                                     
[4]	validation-rmse:7.36808                                                     
[5]	validation-rmse:7.11547                                                     
[6]	validation-rmse:6.94713                                                     
[7]	validation-rmse:6.83799                                                     
[8]	validation-rmse:6.75550                                                     
[9]	validation-rmse:6.70275                                                     
[10]	validation-rmse:6.66553                                                    
[11]	validation-rmse:6.63861                                                    
[12]	validation-rmse:6.61790




[0]	validation-rmse:7.91501                                                     
[1]	validation-rmse:6.96026                                                     
[2]	validation-rmse:6.74608                                                     
[3]	validation-rmse:6.68801                                                     
[4]	validation-rmse:6.66448                                                     
[5]	validation-rmse:6.65705                                                     
[6]	validation-rmse:6.64917                                                     
[7]	validation-rmse:6.64434                                                     
[8]	validation-rmse:6.63950                                                     
[9]	validation-rmse:6.63526                                                     
[10]	validation-rmse:6.63114                                                    
[11]	validation-rmse:6.62590                                                    
[12]	validation-rmse:6.62212




[0]	validation-rmse:10.98190                                                    
[1]	validation-rmse:9.99306                                                     
[2]	validation-rmse:9.20848                                                     
[3]	validation-rmse:8.58882                                                     
[4]	validation-rmse:8.10426                                                     
[5]	validation-rmse:7.73022                                                     
[6]	validation-rmse:7.44121                                                     
[7]	validation-rmse:7.22044                                                     
[8]	validation-rmse:7.05072                                                     
[9]	validation-rmse:6.91989                                                     
[10]	validation-rmse:6.81778                                                    
[11]	validation-rmse:6.73972                                                    
[12]	validation-rmse:6.67944




[0]	validation-rmse:10.02081                                                    
[1]	validation-rmse:8.64527                                                     
[2]	validation-rmse:7.81508                                                     
[3]	validation-rmse:7.32575                                                     
[4]	validation-rmse:7.03853                                                     
[5]	validation-rmse:6.86943                                                     
[6]	validation-rmse:6.76294                                                     
[7]	validation-rmse:6.69240                                                     
[8]	validation-rmse:6.65104                                                     
[9]	validation-rmse:6.62146                                                     
[10]	validation-rmse:6.60313                                                    
[11]	validation-rmse:6.58776                                                    
[12]	validation-rmse:6.57739




[0]	validation-rmse:10.04741                                                    
[1]	validation-rmse:8.69566                                                     
[2]	validation-rmse:7.88154                                                     
[3]	validation-rmse:7.40258                                                     
[4]	validation-rmse:7.12380                                                     
[5]	validation-rmse:6.96072                                                     
[6]	validation-rmse:6.86217                                                     
[7]	validation-rmse:6.80032                                                     
[8]	validation-rmse:6.76188                                                     
[9]	validation-rmse:6.73585                                                     
[10]	validation-rmse:6.71980                                                    
[11]	validation-rmse:6.70745                                                    
[12]	validation-rmse:6.69938




[0]	validation-rmse:9.50573                                                     
[1]	validation-rmse:8.06363                                                     
[2]	validation-rmse:7.33814                                                     
[3]	validation-rmse:6.97875                                                     
[4]	validation-rmse:6.79935                                                     
[5]	validation-rmse:6.70515                                                     
[6]	validation-rmse:6.65064                                                     
[7]	validation-rmse:6.61615                                                     
[8]	validation-rmse:6.59410                                                     
[9]	validation-rmse:6.57907                                                     
[10]	validation-rmse:6.56744                                                    
[11]	validation-rmse:6.55897                                                    
[12]	validation-rmse:6.55633




[0]	validation-rmse:10.70680                                                    
[1]	validation-rmse:9.57731                                                     
[2]	validation-rmse:8.74362                                                     
[3]	validation-rmse:8.13729                                                     
[4]	validation-rmse:7.70181                                                     
[5]	validation-rmse:7.39272                                                     
[6]	validation-rmse:7.16912                                                     
[7]	validation-rmse:7.01047                                                     
[8]	validation-rmse:6.89872                                                     
[9]	validation-rmse:6.81718                                                     
[10]	validation-rmse:6.75552                                                    
[11]	validation-rmse:6.71272                                                    
[12]	validation-rmse:6.67880




[0]	validation-rmse:11.21268                                                    
[1]	validation-rmse:10.37663                                                    
[2]	validation-rmse:9.68205                                                     
[3]	validation-rmse:9.10429                                                     
[4]	validation-rmse:8.63300                                                     
[5]	validation-rmse:8.24517                                                     
[6]	validation-rmse:7.93376                                                     
[7]	validation-rmse:7.67806                                                     
[8]	validation-rmse:7.47139                                                     
[9]	validation-rmse:7.30562                                                     
[10]	validation-rmse:7.16946                                                    
[11]	validation-rmse:7.06037                                                    
[12]	validation-rmse:6.97180




[0]	validation-rmse:11.78270                                                    
[1]	validation-rmse:11.38160                                                    
[2]	validation-rmse:11.00846                                                    
[3]	validation-rmse:10.66176                                                    
[4]	validation-rmse:10.33967                                                    
[5]	validation-rmse:10.04105                                                    
[6]	validation-rmse:9.76453                                                     
[7]	validation-rmse:9.50877                                                     
[8]	validation-rmse:9.27204                                                     
[9]	validation-rmse:9.05339                                                     
[10]	validation-rmse:8.85188                                                    
[11]	validation-rmse:8.66612                                                    
[12]	validation-rmse:8.49482




[0]	validation-rmse:11.17823                                                    
[1]	validation-rmse:10.31738                                                    
[2]	validation-rmse:9.60586                                                     
[3]	validation-rmse:9.02323                                                     
[4]	validation-rmse:8.54575                                                     
[5]	validation-rmse:8.16153                                                     
[6]	validation-rmse:7.84715                                                     
[7]	validation-rmse:7.59651                                                     
[8]	validation-rmse:7.39456                                                     
[9]	validation-rmse:7.23279                                                     
[10]	validation-rmse:7.10167                                                    
[11]	validation-rmse:6.99728                                                    
[12]	validation-rmse:6.91350




[0]	validation-rmse:11.60233                                                    
[1]	validation-rmse:11.05114                                                    
[2]	validation-rmse:10.55405                                                    
[3]	validation-rmse:10.10724                                                    
[4]	validation-rmse:9.70676                                                     
[5]	validation-rmse:9.34766                                                     
[6]	validation-rmse:9.02532                                                     
[7]	validation-rmse:8.73859                                                     
[8]	validation-rmse:8.48562                                                     
[9]	validation-rmse:8.25876                                                     
[10]	validation-rmse:8.05972                                                    
[11]	validation-rmse:7.88134                                                    
[12]	validation-rmse:7.72597




[0]	validation-rmse:11.67164                                                    
[1]	validation-rmse:11.17627                                                    
[2]	validation-rmse:10.72456                                                    
[3]	validation-rmse:10.31292                                                    
[4]	validation-rmse:9.93861                                                     
[5]	validation-rmse:9.59829                                                     
[6]	validation-rmse:9.28919                                                     
[7]	validation-rmse:9.00908                                                     
[8]	validation-rmse:8.75689                                                     
[9]	validation-rmse:8.52881                                                     
[10]	validation-rmse:8.32350                                                    
[11]	validation-rmse:8.13840                                                    
[12]	validation-rmse:7.97256




[0]	validation-rmse:11.51468                                                    
[1]	validation-rmse:10.89375                                                    
[2]	validation-rmse:10.34343                                                    
[3]	validation-rmse:9.85644                                                     
[4]	validation-rmse:9.42782                                                     
[5]	validation-rmse:9.04893                                                     
[6]	validation-rmse:8.71882                                                     
[7]	validation-rmse:8.42921                                                     
[8]	validation-rmse:8.17683                                                     
[9]	validation-rmse:7.95769                                                     
[10]	validation-rmse:7.76688                                                    
[11]	validation-rmse:7.60200                                                    
[12]	validation-rmse:7.45832




[0]	validation-rmse:11.72845                                                    
[1]	validation-rmse:11.28112                                                    
[2]	validation-rmse:10.86888                                                    
[3]	validation-rmse:10.48941                                                    
[4]	validation-rmse:10.14066                                                    
[5]	validation-rmse:9.82044                                                     
[6]	validation-rmse:9.52760                                                     
[7]	validation-rmse:9.25924                                                     
[8]	validation-rmse:9.01342                                                     
[9]	validation-rmse:8.78900                                                     
[10]	validation-rmse:8.58441                                                    
[11]	validation-rmse:8.39804                                                    
[12]	validation-rmse:8.22837




[0]	validation-rmse:11.53996                                                     
[1]	validation-rmse:10.94112                                                     
[2]	validation-rmse:10.40983                                                     
[3]	validation-rmse:9.93943                                                      
[4]	validation-rmse:9.52484                                                      
[5]	validation-rmse:9.16015                                                      
[6]	validation-rmse:8.84037                                                      
[7]	validation-rmse:8.56063                                                      
[8]	validation-rmse:8.31642                                                      
[9]	validation-rmse:8.10385                                                      
[10]	validation-rmse:7.91910                                                     
[11]	validation-rmse:7.75830                                                     
[12]	validation-

In [42]:
mlflow.xgboost.autolog(disable=True)

In [43]:
with mlflow.start_run():

    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        "learning_rate": 0.09585355369315604,
        "max_depth": 30,
        "min_child_weight": 1.060597050922164,
        "objective": "reg:linear",
        "reg_alpha": 0.018060244040060163,
        "reg_lambda": 0.011658731377413597,
        "seed": 42,
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, "validation")],
        early_stopping_rounds=50,
    )

    y_pred = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_pred)
    mlflow.log_metric("rmse", rmse)

    with open("models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



[0]	validation-rmse:11.44482
[1]	validation-rmse:10.77202
[2]	validation-rmse:10.18363
[3]	validation-rmse:9.67396
[4]	validation-rmse:9.23166
[5]	validation-rmse:8.84808
[6]	validation-rmse:8.51883
[7]	validation-rmse:8.23597
[8]	validation-rmse:7.99320
[9]	validation-rmse:7.78709
[10]	validation-rmse:7.61022
[11]	validation-rmse:7.45952
[12]	validation-rmse:7.33049
[13]	validation-rmse:7.22098
[14]	validation-rmse:7.12713
[15]	validation-rmse:7.04752
[16]	validation-rmse:6.98005
[17]	validation-rmse:6.92232
[18]	validation-rmse:6.87112
[19]	validation-rmse:6.82740
[20]	validation-rmse:6.78995
[21]	validation-rmse:6.75792
[22]	validation-rmse:6.72994
[23]	validation-rmse:6.70547
[24]	validation-rmse:6.68390
[25]	validation-rmse:6.66421
[26]	validation-rmse:6.64806
[27]	validation-rmse:6.63280
[28]	validation-rmse:6.61924
[29]	validation-rmse:6.60773
[30]	validation-rmse:6.59777
[31]	validation-rmse:6.58875
[32]	validation-rmse:6.58107
[33]	validation-rmse:6.57217
[34]	validation-rmse:



In [17]:
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
)


from sklearn.svm import LinearSVR


mlflow.sklearn.autolog(log_datasets=False)


for model_class in (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
    LinearSVR,
):

    with mlflow.start_run():

        mlflow.log_param(
            "train-data-path", "data/green_tripdata_2021-01.parquet"
        )

        mlflow.log_param(
            "valid-data-path", "data/green_tripdata_2021-02.parquet"
        )

        mlflow.log_artifact(
            "models/preprocessor.b", artifact_path="preprocessor"
        )

        mlmodel = model_class()

        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)

        rmse = root_mean_squared_error(y_val, y_pred)

        mlflow.log_metric("rmse", rmse)

