# Experiment Tracking
## Preprocessing

In [1]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import Lasso
from sklearn.svm import LinearSVR
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import root_mean_squared_error
import xgboost as xgb
from hyperopt import tpe, hp, Trials, STATUS_OK, fmin
from hyperopt.pyll import scope
import mlflow
import pickle



In [2]:
pd.options.mode.copy_on_write = True

In [3]:
def preprocessing(df):
    # compute target variables: trip duration in minutes
    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    # keep only trips of less than an hour
    df = df.loc[(df.duration >= 1) & (df.duration <= 60), :]

    # extract trip date components
    df['trip_month'] = pd.Categorical(df.lpep_pickup_datetime.dt.month)
    df['trip_dom'] = pd.Categorical(df.lpep_pickup_datetime.dt.day)
    df['trip_hour'] = pd.Categorical(df.lpep_pickup_datetime.dt.hour)

    # concatenate pickup and dropoff locations
    df['PU_DO'] = df['PULocationID'].astype(str) + '_' + df['DOLocationID'].astype(str)

    return df

In [4]:
def one_hot_encoding(df, numerical, categorical, dv=None):
    df[categorical] = df[categorical].astype(str)
    df_dicts = df[categorical + numerical].to_dict(orient='records')

    if not dv:
        dv = DictVectorizer()
        dv.fit(df_dicts)

    X_train = dv.transform(df_dicts)

    return X_train, dv

In [5]:
def train_model(model, X_train, y_train):
    model.fit(X_train, y_train)

In [6]:
# read im the raw data
df_train = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet')
df_val = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet')

In [7]:
# data cleaning
df_train = preprocessing(df_train)
df_val = preprocessing(df_val)

In [8]:
# variable selection
categorical = ['PU_DO']
numerical = ['trip_distance']

In [9]:
# categorical feature encoding
X_train, dv = one_hot_encoding(df_train, numerical, categorical)
X_val, _ = one_hot_encoding(df_val, numerical, categorical, dv=dv)

In [10]:
target = 'duration'
y_train = df_train[target]
y_val = df_val[target]

In [11]:
with open('models/preprocessor.b', "wb") as f_out:
    pickle.dump(dv, f_out)

## Manual Logging

In [12]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

2025/06/04 09:47:02 INFO mlflow.tracking.fluent: Experiment with name 'nyc-taxi-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='/Users/bastienwinant/Desktop/projects/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1749023222199, experiment_id='1', last_update_time=1749023222199, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [13]:
mlflow.sklearn.autolog(disable=True)
mlflow.xgboost.autolog(disable=True)

### Lasso

In [14]:
alpha = .01
lr = Lasso(alpha=alpha)
train_model(lr, X_train, y_train)
y_preds = lr.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [15]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "Lasso")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.log_param("alpha", alpha)
    mlflow.sklearn.log_model(lr, "model")
    mlflow.log_metric("rmse", rmse)



### Xgboost

In [16]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [17]:
def objective(params):
    booster = xgb.train(
        params=params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, "validation")],
        early_stopping_rounds=50,
        verbose_eval=200
    )

    y_preds = booster.predict(valid)
    rmse = root_mean_squared_error(y_val, y_preds)

    with mlflow.start_run():
        mlflow.set_tag("developer", "Armand Winant")

        mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
        mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")

        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
        mlflow.log_params(params)
        mlflow.xgboost.log_model(booster, "model")
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [18]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:squarederror',
    'seed': 42
}

In [19]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:11.05979                          
[200]	validation-rmse:6.52755                         
[400]	validation-rmse:6.47401                         
[600]	validation-rmse:6.44350                         
[800]	validation-rmse:6.42233                         
[999]	validation-rmse:6.41148                         
  0%|          | 0/50 [01:50<?, ?trial/s, best loss=?]





[0]	validation-rmse:9.87885                                                      
[200]	validation-rmse:6.43034                                                    
[228]	validation-rmse:6.43581                                                    
  2%|▏         | 1/50 [03:47<1:46:08, 129.97s/trial, best loss: 6.41148433898481]





[0]	validation-rmse:11.12086                                                     
[200]	validation-rmse:6.33151                                                    
[394]	validation-rmse:6.32218                                                    
  4%|▍         | 2/50 [06:00<1:38:19, 122.92s/trial, best loss: 6.41148433898481]





[0]	validation-rmse:9.97237                                                       
[200]	validation-rmse:6.33711                                                     
[284]	validation-rmse:6.33606                                                     
  6%|▌         | 3/50 [07:12<1:39:32, 127.07s/trial, best loss: 6.322337074509729]





[0]	validation-rmse:11.00957                                                      
[200]	validation-rmse:6.37474                                                     
[400]	validation-rmse:6.33085                                                     
[600]	validation-rmse:6.31111                                                     
[800]	validation-rmse:6.30365                                                     
[979]	validation-rmse:6.30075                                                     
  8%|▊         | 4/50 [08:43<1:17:57, 101.69s/trial, best loss: 6.322337074509729]





[0]	validation-rmse:10.76963                                                      
[200]	validation-rmse:6.44622                                                    
[400]	validation-rmse:6.41302                                                    
[600]	validation-rmse:6.40413                                                    
[682]	validation-rmse:6.40672                                                    
 10%|█         | 5/50 [10:30<1:13:43, 98.31s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.62027                                                      
[200]	validation-rmse:6.47225                                                     
[400]	validation-rmse:6.43416                                                     
[600]	validation-rmse:6.41420                                                     
[800]	validation-rmse:6.40394                                                     
[999]	validation-rmse:6.40013                                                     
 12%|█▏        | 6/50 [14:00<1:15:26, 102.87s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.05643                                                      
[200]	validation-rmse:6.51546                                                     
[400]	validation-rmse:6.50468                                                     
[600]	validation-rmse:6.50100                                                     
[800]	validation-rmse:6.50001                                                     
[816]	validation-rmse:6.49997                                                     
 14%|█▍        | 7/50 [16:39<1:41:16, 141.32s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:8.96487                                                       
[188]	validation-rmse:6.33400                                                     
 16%|█▌        | 8/50 [17:32<1:40:36, 143.72s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.26801                                                      
[200]	validation-rmse:6.37577                                                     
[400]	validation-rmse:6.35854                                                     
[533]	validation-rmse:6.35635                                                     
 18%|█▊        | 9/50 [19:30<1:17:17, 113.10s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.76433                                                       
[200]	validation-rmse:6.61391                                                      
[400]	validation-rmse:6.58644                                                      
[600]	validation-rmse:6.57173                                                      
[800]	validation-rmse:6.55975                                                      
[999]	validation-rmse:6.55318                                                      
 20%|██        | 10/50 [20:43<1:18:18, 117.46s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:9.68607                                                        
[200]	validation-rmse:6.55276                                                      
[400]	validation-rmse:6.55016                                                      
 22%|██▏       | 11/50 [21:45<1:05:51, 101.32s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:7.65031                                                        
[126]	validation-rmse:6.40788                                                   
 24%|██▍       | 12/50 [22:28<57:18, 90.49s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.46688                                                    
[200]	validation-rmse:6.65445                                                   
[400]	validation-rmse:6.60599                                                   
[600]	validation-rmse:6.57085                                                   
[800]	validation-rmse:6.54338                                                   
[999]	validation-rmse:6.52222                                                   
 26%|██▌       | 13/50 [23:20<46:06, 74.78s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.34082                                                    
[200]	validation-rmse:6.38379                                                   
[400]	validation-rmse:6.33859                                                   
[600]	validation-rmse:6.32776                                                   
[610]	validation-rmse:6.32774                                                   
 28%|██▊       | 14/50 [24:17<40:54, 68.18s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.30312                                                    
[200]	validation-rmse:6.40877                                                   
[400]	validation-rmse:6.38961                                                   
[600]	validation-rmse:6.38445                                                   
[733]	validation-rmse:6.38471                                                   
 30%|███       | 15/50 [26:30<38:19, 65.70s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:7.40781                                                     
[130]	validation-rmse:6.44493                                                   
 32%|███▏      | 16/50 [27:16<49:48, 87.91s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:9.18397                                                     
[200]	validation-rmse:6.41608                                                   
[400]	validation-rmse:6.39558                                                   
[600]	validation-rmse:6.38738                                                   
[647]	validation-rmse:6.38765                                                   
 34%|███▍      | 17/50 [28:01<39:46, 72.32s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:8.37337                                                     
[200]	validation-rmse:6.34980                                                   
[321]	validation-rmse:6.34491                                                   
 36%|███▌      | 18/50 [28:34<34:07, 63.98s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:7.74516                                                     
[187]	validation-rmse:6.62574                                                   
 38%|███▊      | 19/50 [29:21<28:47, 55.72s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.79711                                                    
[200]	validation-rmse:6.37165                                                   
[400]	validation-rmse:6.34253                                                   
[600]	validation-rmse:6.33071                                                   
[800]	validation-rmse:6.32720                                                   
[916]	validation-rmse:6.32775                                                   
 40%|████      | 20/50 [33:35<26:18, 52.62s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.72677                                                     
[200]	validation-rmse:6.34769                                                    
[400]	validation-rmse:6.30963                                                    
[600]	validation-rmse:6.30362                                                    
[604]	validation-rmse:6.30330                                                    
 42%|████▏     | 21/50 [35:22<57:04, 118.10s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.66601                                                     
[200]	validation-rmse:6.35279                                                    
[400]	validation-rmse:6.31738                                                    
[543]	validation-rmse:6.31199                                                    
 44%|████▍     | 22/50 [36:36<51:10, 109.65s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:6.73158                                                      
[162]	validation-rmse:6.51924                                                    
 46%|████▌     | 23/50 [37:12<45:43, 101.62s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.54002                                                     
[200]	validation-rmse:6.43562                                                   
[400]	validation-rmse:6.38502                                                   
[600]	validation-rmse:6.35522                                                   
[800]	validation-rmse:6.33569                                                   
[999]	validation-rmse:6.32254                                                   
 48%|████▊     | 24/50 [38:44<34:05, 78.68s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.55323                                                    
[200]	validation-rmse:6.42020                                                   
[400]	validation-rmse:6.39925                                                   
[413]	validation-rmse:6.39982                                                   
 50%|█████     | 25/50 [39:58<34:52, 83.70s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.67703                                                    
[200]	validation-rmse:6.39986                                                   
[400]	validation-rmse:6.35482                                                   
[600]	validation-rmse:6.32976                                                   
[800]	validation-rmse:6.31380                                                   
[999]	validation-rmse:6.30377                                                   
 52%|█████▏    | 26/50 [42:05<32:13, 80.58s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.91210                                                    
[200]	validation-rmse:6.60035                                                   
[400]	validation-rmse:6.53876                                                   
[600]	validation-rmse:6.50154                                                   
[800]	validation-rmse:6.47600                                                   
[999]	validation-rmse:6.45872                                                   
 54%|█████▍    | 27/50 [43:03<36:31, 95.26s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.35781                                                    
[200]	validation-rmse:6.41346                                                   
[400]	validation-rmse:6.36323                                                   
[600]	validation-rmse:6.33613                                                   
[800]	validation-rmse:6.31993                                                   
[999]	validation-rmse:6.30901                                                   
 56%|█████▌    | 28/50 [44:37<30:30, 83.21s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.34273                                                    
[200]	validation-rmse:6.44396                                                   
[400]	validation-rmse:6.40115                                                   
[600]	validation-rmse:6.38607                                                   
[730]	validation-rmse:6.38557                                                   
 58%|█████▊    | 29/50 [45:48<30:16, 86.52s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.91000                                                    
[200]	validation-rmse:6.33359                                                   
[400]	validation-rmse:6.31068                                                   
[466]	validation-rmse:6.31087                                                   
 60%|██████    | 30/50 [47:06<27:03, 81.18s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.08797                                                    
[200]	validation-rmse:6.44678                                                   
[400]	validation-rmse:6.41061                                                   
[600]	validation-rmse:6.39798                                                   
[687]	validation-rmse:6.39825                                                   
 62%|██████▏   | 31/50 [48:49<25:59, 82.05s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:9.98603                                                     
[200]	validation-rmse:6.33373                                                   
[281]	validation-rmse:6.32945                                                   
 64%|██████▍   | 32/50 [49:46<26:08, 87.15s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:9.56929                                                     
[200]	validation-rmse:6.42963                                                   
[400]	validation-rmse:6.36270                                                   
[600]	validation-rmse:6.33113                                                   
[800]	validation-rmse:6.31466                                                   
[999]	validation-rmse:6.30550                                                   
 66%|██████▌   | 33/50 [50:33<22:11, 78.35s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.18534                                                    
[200]	validation-rmse:6.42935                                                   
[400]	validation-rmse:6.41428                                                   
[433]	validation-rmse:6.41403                                                   
 68%|██████▊   | 34/50 [51:26<18:01, 67.61s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.55110                                                    
[200]	validation-rmse:6.40760                                                   
[400]	validation-rmse:6.37844                                                   
[600]	validation-rmse:6.36637                                                   
[800]	validation-rmse:6.35661                                                   
[999]	validation-rmse:6.35209                                                   
 70%|███████   | 35/50 [53:26<16:19, 65.29s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.22284                                                    
[200]	validation-rmse:6.39284                                                   
[400]	validation-rmse:6.35245                                                   
[600]	validation-rmse:6.33417                                                   
[800]	validation-rmse:6.32772                                                   
[857]	validation-rmse:6.32757                                                   
 72%|███████▏  | 36/50 [54:49<19:02, 81.63s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.86375                                                    
[200]	validation-rmse:6.48878                                                   
[400]	validation-rmse:6.46164                                                   
[600]	validation-rmse:6.45118                                                   
[800]	validation-rmse:6.44394                                                   
[999]	validation-rmse:6.43773                                                   
 74%|███████▍  | 37/50 [55:56<17:31, 80.88s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.56212                                                    
[200]	validation-rmse:6.40412                                                   
[400]	validation-rmse:6.38046                                                   
[523]	validation-rmse:6.37946                                                   
 76%|███████▌  | 38/50 [57:25<15:20, 76.73s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.45319                                                    
[200]	validation-rmse:6.45200                                                   
[400]	validation-rmse:6.41626                                                   
[600]	validation-rmse:6.40252                                                   
[748]	validation-rmse:6.39959                                                   
 78%|███████▊  | 39/50 [59:51<14:54, 81.28s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:8.83577                                                        
[151]	validation-rmse:6.45482                                                      
 80%|████████  | 40/50 [1:00:51<16:54, 101.45s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.12326                                                       
[200]	validation-rmse:6.34058                                                     
[400]	validation-rmse:6.31738                                                     
[472]	validation-rmse:6.31716                                                     
 82%|████████▏ | 41/50 [1:02:25<13:06, 87.39s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.71312                                                      
[200]	validation-rmse:6.72100                                                     
[400]	validation-rmse:6.67664                                                     
[600]	validation-rmse:6.64553                                                     
[800]	validation-rmse:6.61939                                                     
[999]	validation-rmse:6.59869                                                     
 84%|████████▍ | 42/50 [1:03:10<11:56, 89.60s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:9.73213                                                       
[200]	validation-rmse:6.41820                                                     
[321]	validation-rmse:6.41240                                                     
 86%|████████▌ | 43/50 [1:04:02<08:49, 75.58s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.01815                                                      
[200]	validation-rmse:6.46302                                                     
[400]	validation-rmse:6.45295                                                     
[533]	validation-rmse:6.45276                                                     
 88%|████████▊ | 44/50 [1:05:32<06:50, 68.40s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:9.29651                                                       
[161]	validation-rmse:6.34097                                                     
 90%|█████████ | 45/50 [1:06:12<06:19, 75.92s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:8.59640                                                       
[200]	validation-rmse:6.36623                                                     
[348]	validation-rmse:6.36003                                                     
 92%|█████████▏| 46/50 [1:06:36<04:14, 63.62s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:11.44513                                                      
[200]	validation-rmse:6.65967                                                     
[400]	validation-rmse:6.64882                                                     
[600]	validation-rmse:6.64347                                                     
[800]	validation-rmse:6.63963                                                     
[999]	validation-rmse:6.63734                                                     
 94%|█████████▍| 47/50 [1:07:58<02:37, 52.35s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.73484                                                      
[200]	validation-rmse:6.42469                                                     
[400]	validation-rmse:6.38084                                                     
[600]	validation-rmse:6.36421                                                     
[800]	validation-rmse:6.35776                                                     
[999]	validation-rmse:6.35291                                                     
 96%|█████████▌| 48/50 [1:08:53<02:05, 62.57s/trial, best loss: 6.300646967506354]





[0]	validation-rmse:10.29921                                                      
[200]	validation-rmse:6.34870                                                     
[400]	validation-rmse:6.31505                                                     
[548]	validation-rmse:6.31207                                                     
 98%|█████████▊| 49/50 [1:09:37<00:58, 58.74s/trial, best loss: 6.300646967506354]





100%|██████████| 50/50 [1:09:46<00:00, 83.72s/trial, best loss: 6.300646967506354]


In [20]:
params = {
    'learning_rate': 0.23768760883453516,
    'max_depth': 32,
    'min_child_weight': 1.4255637138891217,
    'objective': 'reg:squarederror',
    'reg_alpha': 0.20333941491253288,
    'reg_lambda': 0.014135076785365224,
    'seed': 42
}

In [21]:
booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=1000,
    evals=[(valid, "validation")],
    early_stopping_rounds=50,
    verbose_eval=200
)

y_preds = booster.predict(valid)
rmse = root_mean_squared_error(y_val, y_preds)

with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.log_params(params)
    mlflow.xgboost.log_model(booster, "model")
    mlflow.log_metric("rmse", rmse)

[0]	validation-rmse:10.35446
[200]	validation-rmse:6.35461
[400]	validation-rmse:6.31678
[581]	validation-rmse:6.30616




### Random Forest

In [22]:
rf = RandomForestRegressor()
train_model(rf, X_train, y_train)
y_preds = rf.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [23]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "RandomForestRegressor")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(rf, "model")
    mlflow.log_metric("rmse", rmse)



### Linear SVR

In [24]:
svr = LinearSVR(max_iter=10000000)
train_model(svr, X_train, y_train)
y_preds = svr.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [25]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "LinearSVR")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(svr, "model")
    mlflow.log_metric("rmse", rmse)



### Gradient Boosting

In [26]:
gb = GradientBoostingRegressor()
train_model(gb, X_train, y_train)
y_preds = gb.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

In [27]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "GradientBoostingRegressor")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(gb, "model")
    mlflow.log_metric("rmse", rmse)



### Extra Trees

In [28]:
et = ExtraTreesRegressor()
train_model(et, X_train, y_train)
y_preds = et.predict(X_val)
rmse = root_mean_squared_error(y_val, y_preds)

KeyboardInterrupt: 

In [None]:
with mlflow.start_run():
    mlflow.set_tag("developer", "Armand Winant")

    mlflow.log_param("training-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-01.parquet")
    mlflow.log_param("validation-data", "https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2021-02.parquet")
    mlflow.log_param("model", "ExtraTreesRegressor")

    mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")
    mlflow.sklearn.log_model(et, "model")
    mlflow.log_metric("rmse", rmse)