# Experiment Tracking
## Preprocessing

In [1]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import Lasso
from sklearn.svm import LinearSVR
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import root_mean_squared_error
import xgboost as xgb
from hyperopt import tpe, hp, Trials, STATUS_OK, fmin
from hyperopt.pyll import scope
import mlflow
import pickle



In [2]:
pd.options.mode.copy_on_write = True

In [3]:
def preprocessing(df):
    # compute target variables: trip duration in minutes
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    # keep only trips of less than an hour
    df = df.loc[(df.duration >= 1) & (df.duration <= 60), :]

    # extract trip date components
    df['trip_month'] = pd.Categorical(df.lpep_pickup_datetime.dt.month)
    df['trip_dom'] = pd.Categorical(df.lpep_pickup_datetime.dt.day)
    df['trip_hour'] = pd.Categorical(df.lpep_pickup_datetime.dt.hour)

    # concatenate pickup and dropoff locations
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)

    return df

In [4]:
def one_hot_encoding(df, numerical, categorical, dv=None):
    df[categorical] = df[categorical].astype(str)
    df_dicts = df[categorical + numerical].to_dict(orient='records')

    if not dv:
        dv = DictVectorizer()
        dv.fit(df_dicts)

    X_train = dv.transform(df_dicts)

    return X_train, dv

In [5]:
def train_model(model, X_train, y_train):
    model.fit(X_train, y_train)

In [6]:
# read im the raw data
df_train = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
df_val = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')

In [7]:
# data cleaning
df_train = preprocessing(df_train)
df_val = preprocessing(df_val)

In [8]:
# variable selection
categorical = ['PU_DO']
numerical = ['trip_distance']

In [9]:
# categorical feature encoding
X_train, dv = one_hot_encoding(df_train, numerical, categorical)
X_val, _ = one_hot_encoding(df_val, numerical, categorical, dv=dv)

In [10]:
target = 'duration'
y_train = df_train[target]
y_val = df_val[target]

In [11]:
with open('models/preprocessor.b', "wb") as f_out:
    pickle.dump(dv, f_out)

## Manual Logging

In [12]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='/Users/bastienwinant/Desktop/projects/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1749031625980, experiment_id='1', last_update_time=1749031625980, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [13]:
mlflow.sklearn.autolog(disable=True)
mlflow.xgboost.autolog(disable=True)

### Lasso

In [14]:
alpha = .01
lr = Lasso(alpha=alpha)
train_model(lr, X_train, y_train)
y_preds = lr.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [15]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "Lasso")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.log_param("alpha", alpha)
    mlflow.sklearn.log_model(lr, "model")
    mlflow.log_metric("rmse", rmse)



### Xgboost

In [15]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [17]:
def objective(params):
    booster = xgb.train(
        params=params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, "validation")],
        early_stopping_rounds=50,
        verbose_eval=200
    )

    y_preds = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_preds)

    with mlflow.start_run():
        mlflow.set_tag("developer", "Armand Winant")

        mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
        mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")

        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
        mlflow.log_params(params)
        mlflow.xgboost.log_model(booster, "model")
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [18]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:squarederror',
    'seed': 42
}

In [19]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:8.89467                           
[200]	validation-rmse:6.41605                         
[315]	validation-rmse:6.40993                         
  0%|          | 0/50 [00:54<?, ?trial/s, best loss=?]





[0]	validation-rmse:9.45145                                                      
[200]	validation-rmse:6.32534                                                    
[298]	validation-rmse:6.32868                                                    
  2%|▏         | 1/50 [02:18<1:02:26, 76.46s/trial, best loss: 6.409929751696569]





[0]	validation-rmse:10.07281                                                     
[200]	validation-rmse:6.42223                                                    
[400]	validation-rmse:6.40064                                                    
[406]	validation-rmse:6.40090                                                    
  4%|▍         | 2/50 [03:25<1:01:35, 76.98s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:7.53374                                                      
[198]	validation-rmse:6.36312                                                  
  6%|▌         | 3/50 [04:00<56:14, 71.80s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.33493                                                   
[200]	validation-rmse:6.35792                                                  
[400]	validation-rmse:6.33867                                                  
[600]	validation-rmse:6.32975                                                  
[704]	validation-rmse:6.32993                                                  
  8%|▊         | 4/50 [06:04<42:19, 55.20s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:10.08033                                                     
[200]	validation-rmse:6.53170                                                    
[400]	validation-rmse:6.51822                                                    
[600]	validation-rmse:6.51311                                                    
[800]	validation-rmse:6.50562                                                    
[999]	validation-rmse:6.50163                                                    
 10%|█         | 5/50 [07:20<1:02:16, 83.04s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.78862                                                     
[200]	validation-rmse:6.39971                                                  
[400]	validation-rmse:6.37581                                                  
[600]	validation-rmse:6.36521                                                  
[800]	validation-rmse:6.36159                                                  
[894]	validation-rmse:6.36132                                                  
 12%|█▏        | 6/50 [10:11<57:16, 78.11s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:9.45037                                                       
[200]	validation-rmse:6.36265                                                     
[235]	validation-rmse:6.36221                                                     
 14%|█▍        | 7/50 [11:04<1:21:01, 113.06s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.69182                                                      
[200]	validation-rmse:6.64699                                                    
[400]	validation-rmse:6.63789                                                    
[600]	validation-rmse:6.63394                                                    
[800]	validation-rmse:6.63163                                                    
[999]	validation-rmse:6.62998                                                    
 16%|█▌        | 8/50 [13:49<1:02:23, 89.12s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.63904                                                      
[200]	validation-rmse:6.60489                                                     
[400]	validation-rmse:6.57754                                                     
[600]	validation-rmse:6.56664                                                     
[800]	validation-rmse:6.56073                                                     
[999]	validation-rmse:6.55591                                                     
 18%|█▊        | 9/50 [15:02<1:19:37, 116.52s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:9.75363                                                       
[200]	validation-rmse:6.41745                                                     
[400]	validation-rmse:6.37422                                                     
[600]	validation-rmse:6.36072                                                     
[800]	validation-rmse:6.35529                                                     
[805]	validation-rmse:6.35541                                                     
 20%|██        | 10/50 [15:51<1:06:38, 99.96s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.60618                                                      
[200]	validation-rmse:6.46683                                                   
[400]	validation-rmse:6.44142                                                   
[600]	validation-rmse:6.43629                                                   
[620]	validation-rmse:6.43644                                                   
 22%|██▏       | 11/50 [19:11<54:24, 83.71s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.43559                                                       
[200]	validation-rmse:6.60230                                                      
[400]	validation-rmse:6.59001                                                      
[600]	validation-rmse:6.58398                                                      
[800]	validation-rmse:6.58066                                                      
[999]	validation-rmse:6.57839                                                      
 24%|██▍       | 12/50 [21:08<1:17:38, 122.59s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:10.99790                                                       
[200]	validation-rmse:6.42380                                                      
[386]	validation-rmse:6.41485                                                      
 26%|██▌       | 13/50 [22:54<1:13:31, 119.22s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.48916                                                       
[200]	validation-rmse:6.45168                                                      
[400]	validation-rmse:6.40170                                                      
[600]	validation-rmse:6.37344                                                      
[800]	validation-rmse:6.35373                                                      
[999]	validation-rmse:6.34046                                                      
 28%|██▊       | 14/50 [24:04<1:08:57, 114.94s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:6.74470                                                        
[81]	validation-rmse:6.47552                                                     
 30%|███       | 15/50 [24:40<58:57, 101.08s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:11.16045                                                     
[200]	validation-rmse:6.44813                                                   
[400]	validation-rmse:6.38937                                                   
[600]	validation-rmse:6.35469                                                   
[800]	validation-rmse:6.33463                                                   
[999]	validation-rmse:6.32217                                                   
 32%|███▏      | 16/50 [25:53<45:30, 80.32s/trial, best loss: 6.328374987594146]





[0]	validation-rmse:10.98657                                                    
[200]	validation-rmse:6.59437                                                   
[400]	validation-rmse:6.58073                                                   
[600]	validation-rmse:6.57409                                                   
[800]	validation-rmse:6.57055                                                   
[999]	validation-rmse:6.56828                                                   
 34%|███▍      | 17/50 [27:23<43:29, 79.06s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:6.64021                                                     
[96]	validation-rmse:6.50946                                                    
 36%|███▌      | 18/50 [27:55<44:15, 82.98s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:11.15919                                                    
[200]	validation-rmse:6.50647                                                   
[400]	validation-rmse:6.47510                                                   
[600]	validation-rmse:6.46158                                                   
[800]	validation-rmse:6.45344                                                   
[999]	validation-rmse:6.44761                                                   
 38%|███▊      | 19/50 [28:59<34:44, 67.23s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:10.37107                                                    
[200]	validation-rmse:6.56702                                                   
[400]	validation-rmse:6.48905                                                   
[600]	validation-rmse:6.43748                                                   
[800]	validation-rmse:6.40104                                                   
[999]	validation-rmse:6.37620                                                   
 40%|████      | 20/50 [29:49<32:46, 65.54s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:8.34024                                                     
[149]	validation-rmse:6.33803                                                   
 42%|████▏     | 21/50 [30:20<29:07, 60.26s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:10.70466                                                    
[200]	validation-rmse:6.43315                                                   
[400]	validation-rmse:6.40853                                                   
[421]	validation-rmse:6.40861                                                   
 44%|████▍     | 22/50 [31:26<24:04, 51.59s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:10.66312                                                    
[200]	validation-rmse:6.59721                                                   
[400]	validation-rmse:6.53558                                                   
[600]	validation-rmse:6.50058                                                   
[800]	validation-rmse:6.47813                                                   
[999]	validation-rmse:6.46000                                                   
 46%|████▌     | 23/50 [32:08<25:27, 56.57s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:8.97523                                                     
[200]	validation-rmse:6.42952                                                   
[216]	validation-rmse:6.43177                                                   
 48%|████▊     | 24/50 [32:54<22:44, 52.49s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:7.77834                                                     
[200]	validation-rmse:6.35915                                                   
[225]	validation-rmse:6.36162                                                   
 50%|█████     | 25/50 [33:26<20:44, 49.77s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:11.80785                                                    
[200]	validation-rmse:6.63495                                                   
[400]	validation-rmse:6.59514                                                   
[600]	validation-rmse:6.56565                                                   
[800]	validation-rmse:6.54272                                                   
[999]	validation-rmse:6.52337                                                   
 52%|█████▏    | 26/50 [34:26<17:43, 44.33s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:11.16403                                                    
[200]	validation-rmse:6.49249                                                   
[400]	validation-rmse:6.48037                                                   
[600]	validation-rmse:6.47365                                                   
[800]	validation-rmse:6.47173                                                   
[999]	validation-rmse:6.47011                                                   
 54%|█████▍    | 27/50 [36:21<18:55, 49.36s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:9.44417                                                     
[200]	validation-rmse:6.42349                                                   
[353]	validation-rmse:6.41739                                                   
 56%|█████▌    | 28/50 [37:22<26:13, 71.52s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:10.83156                                                    
[200]	validation-rmse:6.43807                                                   
[400]	validation-rmse:6.40711                                                   
[594]	validation-rmse:6.40474                                                   
 58%|█████▊    | 29/50 [38:50<23:29, 67.13s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:10.27770                                                    
[200]	validation-rmse:6.39270                                                   
[400]	validation-rmse:6.38419                                                   
[527]	validation-rmse:6.38618                                                   
 60%|██████    | 30/50 [40:04<24:15, 72.77s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:8.96224                                                     
[200]	validation-rmse:6.33398                                                   
[357]	validation-rmse:6.32564                                                   
 62%|██████▏   | 31/50 [40:52<23:06, 72.96s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:8.75654                                                     
[200]	validation-rmse:6.39183                                                   
[400]	validation-rmse:6.33481                                                   
[600]	validation-rmse:6.32113                                                   
[645]	validation-rmse:6.32307                                                   
 64%|██████▍   | 32/50 [41:24<19:22, 64.56s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:8.18388                                                     
[200]	validation-rmse:6.48066                                                   
[400]	validation-rmse:6.44979                                                   
[450]	validation-rmse:6.45286                                                   
 66%|██████▌   | 33/50 [41:49<15:39, 55.28s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:7.33691                                                     
[200]	validation-rmse:6.51102                                                   
[400]	validation-rmse:6.41635                                                   
[600]	validation-rmse:6.36946                                                   
[800]	validation-rmse:6.35356                                                   
[976]	validation-rmse:6.34571                                                   
 68%|██████▊   | 34/50 [42:19<12:13, 45.86s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:11.24603                                                    
[200]	validation-rmse:6.50400                                                   
[400]	validation-rmse:6.45398                                                   
[600]	validation-rmse:6.42928                                                   
[800]	validation-rmse:6.41494                                                   
[999]	validation-rmse:6.40582                                                   
 70%|███████   | 35/50 [43:39<10:13, 40.90s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:9.95830                                                     
[200]	validation-rmse:6.40903                                                   
[400]	validation-rmse:6.35266                                                   
[600]	validation-rmse:6.32368                                                   
[792]	validation-rmse:6.31131                                                   
 72%|███████▏  | 36/50 [44:24<12:28, 53.50s/trial, best loss: 6.322173027684213]





[0]	validation-rmse:9.91531                                                     
[200]	validation-rmse:6.32871                                                   
[400]	validation-rmse:6.31391                                                   
[441]	validation-rmse:6.31583                                                   
 74%|███████▍  | 37/50 [45:27<11:25, 52.76s/trial, best loss: 6.311247773518201]





[0]	validation-rmse:9.93143                                                     
[200]	validation-rmse:6.32492                                                   
[400]	validation-rmse:6.30672                                                   
[417]	validation-rmse:6.30680                                                   
 76%|███████▌  | 38/50 [46:28<10:53, 54.50s/trial, best loss: 6.311247773518201]





[0]	validation-rmse:9.33262                                                     
[200]	validation-rmse:6.33552                                                   
[244]	validation-rmse:6.33790                                                   
 78%|███████▊  | 39/50 [47:15<10:10, 55.46s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:10.09450                                                    
[200]	validation-rmse:6.40938                                                   
[330]	validation-rmse:6.39909                                                   
 80%|████████  | 40/50 [48:07<08:52, 53.26s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:10.48770                                                    
[200]	validation-rmse:6.41624                                                   
[400]	validation-rmse:6.38624                                                   
[544]	validation-rmse:6.38135                                                   
 82%|████████▏ | 41/50 [49:15<07:53, 52.59s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:9.76666                                                     
[200]	validation-rmse:6.41646                                                   
[213]	validation-rmse:6.41658                                                   
 84%|████████▍ | 42/50 [50:24<07:48, 58.51s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:7.08201                                                     
[116]	validation-rmse:6.39597                                                   
 86%|████████▌ | 43/50 [51:10<07:08, 61.17s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:10.88154                                                    
[200]	validation-rmse:6.45071                                                   
[400]	validation-rmse:6.39402                                                   
[600]	validation-rmse:6.36238                                                   
[800]	validation-rmse:6.34215                                                   
[999]	validation-rmse:6.32995                                                   
 88%|████████▊ | 44/50 [53:00<05:52, 58.72s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:9.32959                                                     
[200]	validation-rmse:6.48778                                                   
[400]	validation-rmse:6.40451                                                   
[600]	validation-rmse:6.36133                                                   
[800]	validation-rmse:6.33043                                                   
[999]	validation-rmse:6.31410                                                   
 90%|█████████ | 45/50 [54:05<06:15, 75.16s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:10.25933                                                    
[200]	validation-rmse:6.43123                                                   
[400]	validation-rmse:6.40062                                                   
[600]	validation-rmse:6.39052                                                   
[800]	validation-rmse:6.38550                                                   
[999]	validation-rmse:6.38173                                                   
 92%|█████████▏| 46/50 [55:14<04:34, 68.52s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:11.70886                                                    
[200]	validation-rmse:6.54637                                                   
[400]	validation-rmse:6.50375                                                   
[600]	validation-rmse:6.47428                                                   
[800]	validation-rmse:6.45304                                                   
[999]	validation-rmse:6.43615                                                   
 94%|█████████▍| 47/50 [57:15<03:26, 68.96s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:9.66920                                                     
[200]	validation-rmse:6.33823                                                   
[349]	validation-rmse:6.33513                                                   
 96%|█████████▌| 48/50 [57:58<02:50, 85.16s/trial, best loss: 6.306800700986433]





[0]	validation-rmse:11.46779                                                    
[200]	validation-rmse:6.66846                                                   
[400]	validation-rmse:6.65745                                                   
[600]	validation-rmse:6.65039                                                   
[800]	validation-rmse:6.64630                                                   
[999]	validation-rmse:6.64307                                                   
 98%|█████████▊| 49/50 [58:56<01:11, 71.18s/trial, best loss: 6.306800700986433]





100%|██████████| 50/50 [59:07<00:00, 70.94s/trial, best loss: 6.306800700986433]


In [13]:
params = {
    'learning_rate': 0.23768760883453516,
    'max_depth': 32,
    'min_child_weight': 1.4255637138891217,
    'objective': 'reg:squarederror',
    'reg_alpha': 0.20333941491253288,
    'reg_lambda': 0.014135076785365224,
    'seed': 42
}

In [16]:
booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=1000,
    evals=[(valid, "validation")],
    early_stopping_rounds=50,
    verbose_eval=200
)

y_preds = booster.predict(valid)
rmse = root_mean_squared_error(y_val, y_preds)

with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.log_params(params)
    mlflow.xgboost.log_model(booster, "model")
    mlflow.log_metric("rmse", rmse)

[0]	validation-rmse:10.35446
[200]	validation-rmse:6.35461
[400]	validation-rmse:6.31678
[581]	validation-rmse:6.30616




### Random Forest

In [22]:
rf = RandomForestRegressor()
train_model(rf, X_train, y_train)
y_preds = rf.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [23]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "RandomForestRegressor")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(rf, "model")
    mlflow.log_metric("rmse", rmse)



### Linear SVR

In [24]:
svr = LinearSVR(max_iter=10000000)
train_model(svr, X_train, y_train)
y_preds = svr.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [25]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "LinearSVR")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(svr, "model")
    mlflow.log_metric("rmse", rmse)



### Gradient Boosting

In [26]:
gb = GradientBoostingRegressor()
train_model(gb, X_train, y_train)
y_preds = gb.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [27]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "GradientBoostingRegressor")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(gb, "model")
    mlflow.log_metric("rmse", rmse)



### Extra Trees

In [28]:
et = ExtraTreesRegressor()
train_model(et, X_train, y_train)
y_preds = et.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [29]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "ExtraTreesRegressor")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(et, "model")
    mlflow.log_metric("rmse", rmse)



## Model Management

In [18]:
logged_model = 'runs:/ef2155ec11214def8ebe2c038790cd9b/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
y_preds = loaded_model.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [20]:
xgboost_model = mlflow.xgboost.load_model(logged_model)

In [21]:
y_preds = xgboost_model.predict(valid)
rmse = root_mean_squared_error(y_val, y_preds)
rmse

6.305945326886427