In [2]:
!python -V

Python 3.11.9


In [3]:
import pandas as pd

In [4]:
import pickle

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error

In [7]:
import mlflow


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='file:///c:/Users/arun_prasath/workspace/courses/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1719996657207, experiment_id='1', last_update_time=1719996657207, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [8]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [9]:
df_train = read_dataframe('../data/green_tripdata_2024-01.parquet')
df_val = read_dataframe('../data/green_tripdata_2024-02.parquet')

In [10]:
len(df_train), len(df_val)

(54373, 51497)

In [11]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [12]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [13]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [14]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



5.995083318320955

In [15]:
with open('../models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [16]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "../data/green_tripdata_2024-01.parquet")
    mlflow.log_param("valid-data-path", "../data/green_tripdata_2024-02.parquet")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="../models/lin_reg.bin", artifact_path="models_pickle")



In [17]:
import xgboost as xgb

In [18]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [19]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [20]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [21]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:7.93917                           
[1]	validation-rmse:7.09039                           
[2]	validation-rmse:6.48602                           
[3]	validation-rmse:6.06816                           
[4]	validation-rmse:5.78482                           
[5]	validation-rmse:5.59209                           
[6]	validation-rmse:5.46501                           
[7]	validation-rmse:5.38168                           
[8]	validation-rmse:5.32558                           
[9]	validation-rmse:5.29144                           
[10]	validation-rmse:5.26402                          
[11]	validation-rmse:5.24835                          
[12]	validation-rmse:5.23623                          
[13]	validation-rmse:5.22766                          
[14]	validation-rmse:5.22075                          
[15]	validation-rmse:5.21668                          
[16]	validation-rmse:5.21265                          
[17]	validation-rmse:5.20864                          
[18]	valid





[0]	validation-rmse:7.47513                                                    
[1]	validation-rmse:6.51375                                                    
[2]	validation-rmse:5.97590                                                    
[3]	validation-rmse:5.67954                                                    
[4]	validation-rmse:5.52285                                                    
[5]	validation-rmse:5.42893                                                    
[6]	validation-rmse:5.37640                                                    
[7]	validation-rmse:5.34230                                                    
[8]	validation-rmse:5.31808                                                    
[9]	validation-rmse:5.30281                                                    
[10]	validation-rmse:5.29217                                                   
[11]	validation-rmse:5.28326                                                   
[12]	validation-rmse:5.27605            





[0]	validation-rmse:8.80116                                                    
[1]	validation-rmse:8.51382                                                    
[2]	validation-rmse:8.24849                                                    
[3]	validation-rmse:8.00132                                                    
[4]	validation-rmse:7.77584                                                    
[5]	validation-rmse:7.56717                                                    
[6]	validation-rmse:7.37448                                                    
[7]	validation-rmse:7.19892                                                    
[8]	validation-rmse:7.03982                                                    
[9]	validation-rmse:6.88846                                                    
[10]	validation-rmse:6.75161                                                   
[11]	validation-rmse:6.62860                                                   
[12]	validation-rmse:6.51680            





[0]	validation-rmse:8.53266                                                    
[1]	validation-rmse:8.03351                                                    
[2]	validation-rmse:7.60268                                                    
[3]	validation-rmse:7.23805                                                    
[4]	validation-rmse:6.93127                                                    
[5]	validation-rmse:6.66519                                                    
[6]	validation-rmse:6.43603                                                    
[7]	validation-rmse:6.24885                                                    
[8]	validation-rmse:6.09199                                                    
[9]	validation-rmse:5.95539                                                    
[10]	validation-rmse:5.85390                                                   
[11]	validation-rmse:5.76598                                                   
[12]	validation-rmse:5.69239            





[0]	validation-rmse:6.33873                                                    
[1]	validation-rmse:5.57838                                                    
[2]	validation-rmse:5.37380                                                    
[3]	validation-rmse:5.29577                                                    
[4]	validation-rmse:5.27587                                                    
[5]	validation-rmse:5.26818                                                    
[6]	validation-rmse:5.26350                                                    
[7]	validation-rmse:5.25896                                                    
[8]	validation-rmse:5.25362                                                    
[9]	validation-rmse:5.24618                                                    
[10]	validation-rmse:5.24459                                                   
[11]	validation-rmse:5.24048                                                   
[12]	validation-rmse:5.23627            





[0]	validation-rmse:8.56239                                                    
[1]	validation-rmse:8.09221                                                    
[2]	validation-rmse:7.68352                                                    
[3]	validation-rmse:7.33572                                                    
[4]	validation-rmse:7.02923                                                    
[5]	validation-rmse:6.76958                                                    
[6]	validation-rmse:6.55081                                                    
[7]	validation-rmse:6.36710                                                    
[8]	validation-rmse:6.19923                                                    
[9]	validation-rmse:6.06751                                                    
[10]	validation-rmse:5.95876                                                   
[11]	validation-rmse:5.86344                                                   
[12]	validation-rmse:5.78012            





[0]	validation-rmse:8.48516                                                    
[1]	validation-rmse:7.94981                                                    
[2]	validation-rmse:7.49569                                                    
[3]	validation-rmse:7.11321                                                    
[4]	validation-rmse:6.79178                                                    
[5]	validation-rmse:6.52417                                                    
[6]	validation-rmse:6.30169                                                    
[7]	validation-rmse:6.11805                                                    
[8]	validation-rmse:5.96667                                                    
[9]	validation-rmse:5.84292                                                    
[10]	validation-rmse:5.74165                                                   
[11]	validation-rmse:5.65889                                                   
[12]	validation-rmse:5.59178            





[0]	validation-rmse:8.63452                                                    
[1]	validation-rmse:8.21119                                                    
[2]	validation-rmse:7.83602                                                    
[3]	validation-rmse:7.50458                                                    
[4]	validation-rmse:7.21316                                                    
[5]	validation-rmse:6.95759                                                    
[6]	validation-rmse:6.73461                                                    
[7]	validation-rmse:6.53978                                                    
[8]	validation-rmse:6.37013                                                    
[9]	validation-rmse:6.22309                                                    
[10]	validation-rmse:6.09601                                                   
[11]	validation-rmse:5.98613                                                   
[12]	validation-rmse:5.89191            





[0]	validation-rmse:7.26872                                                    
[1]	validation-rmse:6.25878                                                    
[2]	validation-rmse:5.72934                                                    
[3]	validation-rmse:5.46403                                                    
[4]	validation-rmse:5.33683                                                    
[5]	validation-rmse:5.27261                                                    
[6]	validation-rmse:5.23937                                                    
[7]	validation-rmse:5.21803                                                    
[8]	validation-rmse:5.20906                                                    
[9]	validation-rmse:5.20002                                                    
[10]	validation-rmse:5.19450                                                   
[11]	validation-rmse:5.18997                                                   
[12]	validation-rmse:5.18797            





[0]	validation-rmse:8.67039                                                    
[1]	validation-rmse:8.27610                                                    
[2]	validation-rmse:7.92410                                                    
[3]	validation-rmse:7.61407                                                    
[4]	validation-rmse:7.33654                                                    
[5]	validation-rmse:7.08781                                                    
[6]	validation-rmse:6.87113                                                    
[7]	validation-rmse:6.67810                                                    
[8]	validation-rmse:6.50602                                                    
[9]	validation-rmse:6.36024                                                    
[10]	validation-rmse:6.23175                                                   
[11]	validation-rmse:6.11715                                                   
[12]	validation-rmse:6.01840            





[0]	validation-rmse:6.17993                                                     
[1]	validation-rmse:5.47396                                                     
[2]	validation-rmse:5.32731                                                     
[3]	validation-rmse:5.28764                                                     
[4]	validation-rmse:5.27042                                                     
[5]	validation-rmse:5.25950                                                     
[6]	validation-rmse:5.24650                                                     
[7]	validation-rmse:5.23952                                                     
[8]	validation-rmse:5.22992                                                     
[9]	validation-rmse:5.23032                                                     
[10]	validation-rmse:5.22955                                                    
[11]	validation-rmse:5.22900                                                    
[12]	validation-rmse:5.22134





[0]	validation-rmse:8.79506                                                     
[1]	validation-rmse:8.50188                                                     
[2]	validation-rmse:8.23011                                                     
[3]	validation-rmse:7.97874                                                     
[4]	validation-rmse:7.74613                                                     
[5]	validation-rmse:7.53186                                                     
[6]	validation-rmse:7.33370                                                     
[7]	validation-rmse:7.15125                                                     
[8]	validation-rmse:6.98267                                                     
[9]	validation-rmse:6.82873                                                     
[10]	validation-rmse:6.68719                                                    
[11]	validation-rmse:6.55757                                                    
[12]	validation-rmse:6.43742





[3]	validation-rmse:6.08850                                                     
[4]	validation-rmse:5.88001                                                     
[5]	validation-rmse:5.74855                                                     
[6]	validation-rmse:5.67055                                                     
[7]	validation-rmse:5.62182                                                     
[8]	validation-rmse:5.59060                                                     
[9]	validation-rmse:5.56779                                                     
[10]	validation-rmse:5.54711                                                    
[11]	validation-rmse:5.53815                                                    
[12]	validation-rmse:5.52804                                                    
[13]	validation-rmse:5.52059                                                    
[14]	validation-rmse:5.51632                                                    
[15]	validation-rmse:5.51250





[0]	validation-rmse:8.28471                                                     
[1]	validation-rmse:7.62448                                                     
[2]	validation-rmse:7.10304                                                     
[3]	validation-rmse:6.69603                                                     
[4]	validation-rmse:6.38101                                                     
[5]	validation-rmse:6.14068                                                     
[6]	validation-rmse:5.95793                                                     
[7]	validation-rmse:5.81965                                                     
[8]	validation-rmse:5.71549                                                     
[9]	validation-rmse:5.63842                                                     
[10]	validation-rmse:5.57999                                                    
[11]	validation-rmse:5.53504                                                    
[12]	validation-rmse:5.50088





[0]	validation-rmse:7.10031                                                     
[1]	validation-rmse:6.12687                                                     
[2]	validation-rmse:5.68212                                                     
[3]	validation-rmse:5.46908                                                     
[4]	validation-rmse:5.39549                                                     
[5]	validation-rmse:5.36020                                                     
[6]	validation-rmse:5.32638                                                     
[7]	validation-rmse:5.31430                                                     
[8]	validation-rmse:5.30776                                                     
[9]	validation-rmse:5.30310                                                     
[10]	validation-rmse:5.29517                                                    
[11]	validation-rmse:5.29641                                                    
[12]	validation-rmse:5.29390





[0]	validation-rmse:8.65581                                                     
[1]	validation-rmse:8.24797                                                     
[2]	validation-rmse:7.88446                                                     
[3]	validation-rmse:7.55974                                                     
[4]	validation-rmse:7.27169                                                     
[5]	validation-rmse:7.01764                                                     
[6]	validation-rmse:6.79273                                                     
[7]	validation-rmse:6.59416                                                     
[8]	validation-rmse:6.41880                                                     
[9]	validation-rmse:6.26684                                                     
[10]	validation-rmse:6.13152                                                    
[11]	validation-rmse:6.01550                                                    
[12]	validation-rmse:5.91196





[1]	validation-rmse:7.09446
[2]	validation-rmse:6.54068                                                     
[3]	validation-rmse:6.17384                                                     
[4]	validation-rmse:5.93420                                                     
[5]	validation-rmse:5.77485                                                     
[6]	validation-rmse:5.67646                                                     
[7]	validation-rmse:5.60880                                                     
[8]	validation-rmse:5.56182                                                     
[9]	validation-rmse:5.53391                                                     
[10]	validation-rmse:5.51193                                                    
[11]	validation-rmse:5.49274                                                    
[12]	validation-rmse:5.48289                                                    
[13]	validation-rmse:5.47334                                                    






[0]	validation-rmse:6.28823                                                     
[1]	validation-rmse:5.53818                                                     
[2]	validation-rmse:5.35792                                                     
[3]	validation-rmse:5.31012                                                     
[4]	validation-rmse:5.29010                                                     
[5]	validation-rmse:5.27647                                                     
[6]	validation-rmse:5.26944                                                     
[7]	validation-rmse:5.26802                                                     
[8]	validation-rmse:5.25713                                                     
[9]	validation-rmse:5.25682                                                     
[10]	validation-rmse:5.25594                                                    
[11]	validation-rmse:5.25182                                                    
[12]	validation-rmse:5.23787





[0]	validation-rmse:8.58515                                                     
[1]	validation-rmse:8.12490                                                     
[2]	validation-rmse:7.72352                                                     
[3]	validation-rmse:7.37501                                                     
[4]	validation-rmse:7.07418                                                     
[5]	validation-rmse:6.81532                                                     
[6]	validation-rmse:6.59349                                                     
[7]	validation-rmse:6.40366                                                     
[8]	validation-rmse:6.24210                                                     
[9]	validation-rmse:6.10495                                                     
[10]	validation-rmse:5.98817                                                    
[11]	validation-rmse:5.88997                                                    
[12]	validation-rmse:5.80667





[0]	validation-rmse:5.56967                                                     
[1]	validation-rmse:5.34435                                                     
[2]	validation-rmse:5.30855                                                     
[3]	validation-rmse:5.29612                                                     
[4]	validation-rmse:5.29065                                                     
[5]	validation-rmse:5.28647                                                     
[6]	validation-rmse:5.27671                                                     
[7]	validation-rmse:5.27127                                                     
[8]	validation-rmse:5.26769                                                     
[9]	validation-rmse:5.26107                                                     
[10]	validation-rmse:5.26044                                                    
[11]	validation-rmse:5.25743                                                    
[12]	validation-rmse:5.25783





[0]	validation-rmse:7.96341                                                     
[1]	validation-rmse:7.14643                                                     
[2]	validation-rmse:6.57521                                                     
[3]	validation-rmse:6.18172                                                     
[4]	validation-rmse:5.91651                                                     
[5]	validation-rmse:5.74460                                                     
[6]	validation-rmse:5.62532                                                     
[7]	validation-rmse:5.53983                                                     
[8]	validation-rmse:5.48766                                                     
[9]	validation-rmse:5.44803                                                     
[10]	validation-rmse:5.42073                                                    
[11]	validation-rmse:5.40051                                                    
[12]	validation-rmse:5.38296





[0]	validation-rmse:8.25585                                                     
[1]	validation-rmse:7.57580                                                     
[2]	validation-rmse:7.04460                                                     
[3]	validation-rmse:6.63284                                                     
[4]	validation-rmse:6.31480                                                     
[5]	validation-rmse:6.07182                                                     
[6]	validation-rmse:5.88750                                                     
[7]	validation-rmse:5.74544                                                     
[8]	validation-rmse:5.63922                                                     
[9]	validation-rmse:5.55727                                                     
[10]	validation-rmse:5.49701                                                    
[11]	validation-rmse:5.44965                                                    
[12]	validation-rmse:5.41475





[0]	validation-rmse:8.34372                                                     
[1]	validation-rmse:7.72165                                                     
[2]	validation-rmse:7.22156                                                     
[3]	validation-rmse:6.82216                                                     
[4]	validation-rmse:6.50506                                                     
[5]	validation-rmse:6.25975                                                     
[6]	validation-rmse:6.06260                                                     
[7]	validation-rmse:5.91192                                                     
[8]	validation-rmse:5.79515                                                     
[9]	validation-rmse:5.70314                                                     
[10]	validation-rmse:5.62924                                                    
[11]	validation-rmse:5.57211                                                    
[12]	validation-rmse:5.52727





[0]	validation-rmse:8.27221                                                     
[1]	validation-rmse:7.59845                                                     
[2]	validation-rmse:7.06614                                                     
[3]	validation-rmse:6.64577                                                     
[4]	validation-rmse:6.32050                                                     
[5]	validation-rmse:6.06921                                                     
[6]	validation-rmse:5.87453                                                     
[7]	validation-rmse:5.72991                                                     
[8]	validation-rmse:5.61315                                                     
[9]	validation-rmse:5.52576                                                     
[10]	validation-rmse:5.46095                                                    
[11]	validation-rmse:5.40900                                                    
[12]	validation-rmse:5.36924





[2]	validation-rmse:7.03397                                                     
[3]	validation-rmse:6.64317                                                     
[4]	validation-rmse:6.35207                                                     
[5]	validation-rmse:6.13340                                                     
[6]	validation-rmse:5.97117                                                     
[7]	validation-rmse:5.85356                                                     
[8]	validation-rmse:5.77022                                                     
[9]	validation-rmse:5.70396                                                     
[10]	validation-rmse:5.65589                                                    
[11]	validation-rmse:5.61860                                                    
[12]	validation-rmse:5.59133                                                    
[13]	validation-rmse:5.57033                                                    
[14]	validation-rmse:5.55227





[0]	validation-rmse:6.98021                                                     
[1]	validation-rmse:6.03741                                                     
[2]	validation-rmse:5.65710                                                     
[3]	validation-rmse:5.50612                                                     
[4]	validation-rmse:5.42905                                                     
[5]	validation-rmse:5.38626                                                     
[6]	validation-rmse:5.36329                                                     
[7]	validation-rmse:5.35451                                                     
[8]	validation-rmse:5.35045                                                     
[9]	validation-rmse:5.34694                                                     
[10]	validation-rmse:5.34311                                                    
[11]	validation-rmse:5.34120                                                    
[12]	validation-rmse:5.33967





[0]	validation-rmse:8.03750                                                     
[1]	validation-rmse:7.24444                                                     
[2]	validation-rmse:6.67010                                                     
[3]	validation-rmse:6.25901                                                     
[4]	validation-rmse:5.97350                                                     
[5]	validation-rmse:5.77120                                                     
[6]	validation-rmse:5.62878                                                     
[7]	validation-rmse:5.52895                                                     
[8]	validation-rmse:5.45977                                                     
[9]	validation-rmse:5.40954                                                     
[10]	validation-rmse:5.37396                                                    
[11]	validation-rmse:5.34634                                                    
[12]	validation-rmse:5.32521





[0]	validation-rmse:7.68577                                                     
[1]	validation-rmse:6.76308                                                     
[2]	validation-rmse:6.20745                                                     
[3]	validation-rmse:5.86419                                                     
[4]	validation-rmse:5.66093                                                     
[5]	validation-rmse:5.52419                                                     
[6]	validation-rmse:5.45013                                                     
[7]	validation-rmse:5.40235                                                     
[8]	validation-rmse:5.36157                                                     
[9]	validation-rmse:5.34458                                                     
[10]	validation-rmse:5.33307                                                    
[11]	validation-rmse:5.32597                                                    
[12]	validation-rmse:5.32057





[0]	validation-rmse:8.41126                                                     
[1]	validation-rmse:7.82455                                                     
[2]	validation-rmse:7.33586                                                     
[3]	validation-rmse:6.93381                                                     
[4]	validation-rmse:6.60108                                                     
[5]	validation-rmse:6.33058                                                     
[6]	validation-rmse:6.11117                                                     
[7]	validation-rmse:5.93428                                                     
[8]	validation-rmse:5.79052                                                     
[9]	validation-rmse:5.67564                                                     
[10]	validation-rmse:5.58506                                                    
[11]	validation-rmse:5.51122                                                    
[12]	validation-rmse:5.45273





[0]	validation-rmse:7.99251                                                     
[1]	validation-rmse:7.17007                                                     
[2]	validation-rmse:6.57734                                                     
[3]	validation-rmse:6.15620                                                     
[4]	validation-rmse:5.86543                                                     
[5]	validation-rmse:5.66764                                                     
[6]	validation-rmse:5.53146                                                     
[7]	validation-rmse:5.44233                                                     
[8]	validation-rmse:5.38170                                                     
[9]	validation-rmse:5.34045                                                     
[10]	validation-rmse:5.31281                                                    
[11]	validation-rmse:5.29251                                                    
[12]	validation-rmse:5.27828





[0]	validation-rmse:8.74205                                                     
[1]	validation-rmse:8.40537                                                     
[2]	validation-rmse:8.09897                                                     
[3]	validation-rmse:7.82034                                                     
[4]	validation-rmse:7.56799                                                     
[5]	validation-rmse:7.33921                                                     
[6]	validation-rmse:7.13199                                                     
[7]	validation-rmse:6.94494                                                     
[8]	validation-rmse:6.77716                                                     
[9]	validation-rmse:6.62533                                                     
[10]	validation-rmse:6.48962                                                    
[11]	validation-rmse:6.36798                                                    
[12]	validation-rmse:6.25756





[0]	validation-rmse:8.12676                                                     
[1]	validation-rmse:7.37800                                                     
[2]	validation-rmse:6.81974                                                     
[3]	validation-rmse:6.40834                                                     
[4]	validation-rmse:6.10928                                                     
[5]	validation-rmse:5.89466                                                     
[6]	validation-rmse:5.73968                                                     
[7]	validation-rmse:5.63041                                                     
[8]	validation-rmse:5.55317                                                     
[9]	validation-rmse:5.49829                                                     
[10]	validation-rmse:5.45916                                                    
[11]	validation-rmse:5.43062                                                    
[12]	validation-rmse:5.40989





[0]	validation-rmse:7.34735                                                     
[1]	validation-rmse:6.36664                                                     
[2]	validation-rmse:5.84667                                                     
[3]	validation-rmse:5.59536                                                     
[4]	validation-rmse:5.46404                                                     
[5]	validation-rmse:5.39821                                                     
[6]	validation-rmse:5.35746                                                     
[7]	validation-rmse:5.32134                                                     
[8]	validation-rmse:5.31118                                                     
[9]	validation-rmse:5.30328                                                     
[10]	validation-rmse:5.29696                                                    
[11]	validation-rmse:5.29332                                                    
[12]	validation-rmse:5.29044





[0]	validation-rmse:8.41153                                                     
[1]	validation-rmse:7.82930                                                     
[2]	validation-rmse:7.34968                                                     
[3]	validation-rmse:6.95739                                                     
[4]	validation-rmse:6.64001                                                     
[5]	validation-rmse:6.38023                                                     
[6]	validation-rmse:6.16835                                                     
[7]	validation-rmse:6.00648                                                     
[8]	validation-rmse:5.86528                                                     
[9]	validation-rmse:5.76008                                                     
[10]	validation-rmse:5.67581                                                    
[11]	validation-rmse:5.60159                                                    
[12]	validation-rmse:5.54734





[0]	validation-rmse:6.83287                                                     
[1]	validation-rmse:5.90213                                                     
[2]	validation-rmse:5.55300                                                     
[3]	validation-rmse:5.41808                                                     
[4]	validation-rmse:5.35045                                                     
[5]	validation-rmse:5.31626                                                     
[6]	validation-rmse:5.30339                                                     
[7]	validation-rmse:5.29716                                                     
[8]	validation-rmse:5.29572                                                     
[9]	validation-rmse:5.29267                                                     
[10]	validation-rmse:5.29045                                                    
[11]	validation-rmse:5.28732                                                    
[12]	validation-rmse:5.28175





[0]	validation-rmse:5.74101                                                     
[1]	validation-rmse:5.47888                                                     
[2]	validation-rmse:5.45567                                                     
[3]	validation-rmse:5.44856                                                     
[4]	validation-rmse:5.44478                                                     
[5]	validation-rmse:5.43989                                                     
[6]	validation-rmse:5.42250                                                     
[7]	validation-rmse:5.41841                                                     
[8]	validation-rmse:5.41455                                                     
[9]	validation-rmse:5.41392                                                     
[10]	validation-rmse:5.41125                                                    
[11]	validation-rmse:5.40662                                                    
[12]	validation-rmse:5.40448





[0]	validation-rmse:7.54299                                                     
[1]	validation-rmse:6.57838                                                     
[2]	validation-rmse:6.01750                                                     
[3]	validation-rmse:5.69229                                                     
[4]	validation-rmse:5.50433                                                     
[5]	validation-rmse:5.39822                                                     
[6]	validation-rmse:5.33822                                                     
[7]	validation-rmse:5.30121                                                     
[8]	validation-rmse:5.27314                                                     
[9]	validation-rmse:5.25563                                                     
[10]	validation-rmse:5.24321                                                    
[11]	validation-rmse:5.23587                                                    
[12]	validation-rmse:5.22992





[0]	validation-rmse:7.80064                                                     
[1]	validation-rmse:6.92189                                                     
[2]	validation-rmse:6.33348                                                     
[3]	validation-rmse:5.95254                                                     
[4]	validation-rmse:5.70708                                                     
[5]	validation-rmse:5.56385                                                     
[6]	validation-rmse:5.45415                                                     
[7]	validation-rmse:5.39400                                                     
[8]	validation-rmse:5.35424                                                     
[9]	validation-rmse:5.32948                                                     
[10]	validation-rmse:5.30466                                                    
[11]	validation-rmse:5.29613                                                    
[12]	validation-rmse:5.28944





[0]	validation-rmse:8.74355                                                     
[1]	validation-rmse:8.40882                                                     
[2]	validation-rmse:8.10460                                                     
[3]	validation-rmse:7.82878                                                     
[4]	validation-rmse:7.57925                                                     
[5]	validation-rmse:7.35397                                                     
[6]	validation-rmse:7.15088                                                     
[7]	validation-rmse:6.96813                                                     
[8]	validation-rmse:6.80421                                                     
[9]	validation-rmse:6.65702                                                     
[10]	validation-rmse:6.52509                                                    
[11]	validation-rmse:6.40776                                                    
[12]	validation-rmse:6.30285





[0]	validation-rmse:8.52372                                                     
[1]	validation-rmse:8.01803                                                     
[2]	validation-rmse:7.58568                                                     
[3]	validation-rmse:7.21852                                                     
[4]	validation-rmse:6.90710                                                     
[5]	validation-rmse:6.64349                                                     
[6]	validation-rmse:6.42235                                                     
[7]	validation-rmse:6.23756                                                     
[8]	validation-rmse:6.08201                                                     
[9]	validation-rmse:5.95292                                                     
[10]	validation-rmse:5.84507                                                    
[11]	validation-rmse:5.75538                                                    
[12]	validation-rmse:5.68095





[0]	validation-rmse:6.62705                                                     
[1]	validation-rmse:5.75872                                                     
[2]	validation-rmse:5.48004                                                     
[3]	validation-rmse:5.39157                                                     
[4]	validation-rmse:5.35524                                                     
[5]	validation-rmse:5.33771                                                     
[6]	validation-rmse:5.32736                                                     
[7]	validation-rmse:5.30962                                                     
[8]	validation-rmse:5.30204                                                     
[9]	validation-rmse:5.29749                                                     
[10]	validation-rmse:5.29649                                                    
[11]	validation-rmse:5.28268                                                    
[12]	validation-rmse:5.27917





[0]	validation-rmse:5.37712                                                     
[1]	validation-rmse:5.29184                                                     
[2]	validation-rmse:5.28126                                                     
[3]	validation-rmse:5.27211                                                     
[4]	validation-rmse:5.26584                                                     
[5]	validation-rmse:5.26175                                                     
[6]	validation-rmse:5.25808                                                     
[7]	validation-rmse:5.25497                                                     
[8]	validation-rmse:5.25440                                                     
[9]	validation-rmse:5.23767                                                     
[10]	validation-rmse:5.23004                                                    
[11]	validation-rmse:5.22675                                                    
[12]	validation-rmse:5.22425





[0]	validation-rmse:7.17640                                                     
[1]	validation-rmse:6.21172                                                     
[2]	validation-rmse:5.76488                                                     
[3]	validation-rmse:5.56377                                                     
[4]	validation-rmse:5.46974                                                     
[5]	validation-rmse:5.42571                                                     
[6]	validation-rmse:5.40046                                                     
[7]	validation-rmse:5.38451                                                     
[8]	validation-rmse:5.37384                                                     
[9]	validation-rmse:5.36777                                                     
[10]	validation-rmse:5.35726                                                    
[11]	validation-rmse:5.35021                                                    
[12]	validation-rmse:5.34396





[0]	validation-rmse:8.16232                                                     
[1]	validation-rmse:7.43007                                                     
[2]	validation-rmse:6.87168                                                     
[3]	validation-rmse:6.45652                                                     
[4]	validation-rmse:6.15057                                                     
[5]	validation-rmse:5.91665                                                     
[6]	validation-rmse:5.75116                                                     
[7]	validation-rmse:5.62638                                                     
[8]	validation-rmse:5.53384                                                     
[9]	validation-rmse:5.46823                                                     
[10]	validation-rmse:5.42122                                                    
[11]	validation-rmse:5.38180                                                    
[12]	validation-rmse:5.35033





[0]	validation-rmse:8.43132                                                     
[1]	validation-rmse:7.86745                                                     
[2]	validation-rmse:7.40233                                                     
[3]	validation-rmse:7.02320                                                     
[4]	validation-rmse:6.71147                                                     
[5]	validation-rmse:6.45992                                                     
[6]	validation-rmse:6.26114                                                     
[7]	validation-rmse:6.10085                                                     
[8]	validation-rmse:5.96774                                                     
[9]	validation-rmse:5.86556                                                     
[10]	validation-rmse:5.78395                                                    
[11]	validation-rmse:5.71723                                                    
[12]	validation-rmse:5.66601





[0]	validation-rmse:5.92936                                                     
[1]	validation-rmse:5.35895                                                     
[2]	validation-rmse:5.26304                                                     
[3]	validation-rmse:5.23309                                                     
[4]	validation-rmse:5.22689                                                     
[5]	validation-rmse:5.21581                                                     
[6]	validation-rmse:5.21477                                                     
[7]	validation-rmse:5.21302                                                     
[8]	validation-rmse:5.21178                                                     
[9]	validation-rmse:5.20952                                                     
[10]	validation-rmse:5.20939                                                    
[11]	validation-rmse:5.21006                                                    
[12]	validation-rmse:5.20805





[1]	validation-rmse:6.82213                                                     
[2]	validation-rmse:6.29540                                                     
[3]	validation-rmse:5.99433                                                     
[4]	validation-rmse:5.81923                                                     
[5]	validation-rmse:5.71459                                                     
[6]	validation-rmse:5.65093                                                     
[7]	validation-rmse:5.61124                                                     
[8]	validation-rmse:5.58838                                                     
[9]	validation-rmse:5.57014                                                     
[10]	validation-rmse:5.55531                                                    
[11]	validation-rmse:5.54524                                                    
[12]	validation-rmse:5.53660                                                    
[13]	validation-rmse:5.53265





[0]	validation-rmse:7.83039                                                     
[1]	validation-rmse:6.94865                                                     
[2]	validation-rmse:6.35700                                                     
[3]	validation-rmse:5.96441                                                     
[4]	validation-rmse:5.71313                                                     
[5]	validation-rmse:5.55334                                                     
[6]	validation-rmse:5.44969                                                     
[7]	validation-rmse:5.38201                                                     
[8]	validation-rmse:5.33269                                                     
[9]	validation-rmse:5.29818                                                     
[10]	validation-rmse:5.27306                                                    
[11]	validation-rmse:5.25532                                                    
[12]	validation-rmse:5.24633





[0]	validation-rmse:8.70736                                                     
[1]	validation-rmse:8.34048                                                     
[2]	validation-rmse:8.00723                                                     
[3]	validation-rmse:7.70655                                                     
[4]	validation-rmse:7.43525                                                     
[5]	validation-rmse:7.18961                                                     
[6]	validation-rmse:6.96901                                                     
[7]	validation-rmse:6.77140                                                     
[8]	validation-rmse:6.59393                                                     
[9]	validation-rmse:6.43465                                                     
[10]	validation-rmse:6.29366                                                    
[11]	validation-rmse:6.16838                                                    
[12]	validation-rmse:6.05611





[0]	validation-rmse:8.60850                                                     
[1]	validation-rmse:8.16780                                                     
[2]	validation-rmse:7.77431                                                     
[3]	validation-rmse:7.44227                                                     
[4]	validation-rmse:7.14324                                                     
[5]	validation-rmse:6.88661                                                     
[6]	validation-rmse:6.65472                                                     
[7]	validation-rmse:6.46313                                                     
[8]	validation-rmse:6.29470                                                     
[9]	validation-rmse:6.14918                                                     
[10]	validation-rmse:6.02423                                                    
[11]	validation-rmse:5.92184                                                    
[12]	validation-rmse:5.83456




In [22]:
mlflow.xgboost.autolog(disable=True)

In [23]:
with mlflow.start_run():
    
    train = xgb.DMatrix(X_train, label=y_train)
    valid = xgb.DMatrix(X_val, label=y_val)

    best_params = {
        'learning_rate': 0.09585355369315604,
        'max_depth': 30,
        'min_child_weight': 1.060597050922164,
        'objective': 'reg:linear',
        'reg_alpha': 0.018060244040060163,
        'reg_lambda': 0.011658731377413597,
        'seed': 42
    }

    mlflow.log_params(best_params)

    booster = xgb.train(
        params=best_params,
        dtrain=train,
        num_boost_round=1000,
        evals=[(valid, 'validation')],
        early_stopping_rounds=50
    )

    y_pred = booster.predict(valid)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    with open("../models/preprocessor.b", "wb") as f_out:
        pickle.dump(dv, f_out)
    mlflow.log_artifact("../models/preprocessor.b", artifact_path="preprocessor")

    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")



[0]	validation-rmse:8.55121
[1]	validation-rmse:8.06400
[2]	validation-rmse:7.64347
[3]	validation-rmse:7.28077
[4]	validation-rmse:6.96868
[5]	validation-rmse:6.70403
[6]	validation-rmse:6.47713
[7]	validation-rmse:6.28420
[8]	validation-rmse:6.12122
[9]	validation-rmse:5.98447
[10]	validation-rmse:5.87103
[11]	validation-rmse:5.77302
[12]	validation-rmse:5.68988
[13]	validation-rmse:5.62238
[14]	validation-rmse:5.56459
[15]	validation-rmse:5.51506
[16]	validation-rmse:5.47349
[17]	validation-rmse:5.43747
[18]	validation-rmse:5.40878
[19]	validation-rmse:5.38302
[20]	validation-rmse:5.36147
[21]	validation-rmse:5.34029
[22]	validation-rmse:5.32216
[23]	validation-rmse:5.30792
[24]	validation-rmse:5.29485
[25]	validation-rmse:5.28297
[26]	validation-rmse:5.27297
[27]	validation-rmse:5.26478
[28]	validation-rmse:5.25749
[29]	validation-rmse:5.25086
[30]	validation-rmse:5.24386
[31]	validation-rmse:5.23724
[32]	validation-rmse:5.23377
[33]	validation-rmse:5.23046
[34]	validation-rmse:5.2



In [26]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import LinearSVR

mlflow.sklearn.autolog()

for model_class in (RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, LinearSVR):

    with mlflow.start_run():

        mlflow.log_param("train-data-path", "../data/green_tripdata_2023-01.parquet")
        mlflow.log_param("valid-data-path", "../data/green_tripdata_2023-02.parquet")
        mlflow.log_artifact("models/preprocessor.b", artifact_path="preprocessor")

        mlmodel = model_class()
        mlmodel.fit(X_train, y_train)

        y_pred = mlmodel.predict(X_val)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)
        



AssertionError: c:\Users\arun_prasath\AppData\Local\anaconda3\envs\mlops\Lib\distutils\core.py