In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.svm import SVR

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

import mlflow

In [2]:
mlflow.set_tracking_uri("https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow")

In [4]:
import dagshub
dagshub.init(repo_owner='maverick011', repo_name='Uber-Demand-Prediction', mlflow=True)

In [5]:
# load the training and test data

train_data_path = "../data/processed/train.csv"
test_data_path = "../data/processed/test.csv"

train_df = pd.read_csv(train_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

test_df = pd.read_csv(test_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

train_df

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,total_pickups,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,187,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,194,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,180,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,197,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185,185.0,4
...,...,...,...,...,...,...,...,...
2016-02-29 22:45:00,15.0,9.0,11.0,11.0,29,12,12.0,0
2016-02-29 23:00:00,12.0,15.0,9.0,11.0,29,17,14.0,0
2016-02-29 23:15:00,17.0,12.0,15.0,9.0,29,15,14.0,0
2016-02-29 23:30:00,15.0,17.0,12.0,15.0,29,15,15.0,0


In [6]:
# missing value in training data

train_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [7]:
# missing values in the test data

test_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [8]:
# make X_train and y_train

X_train = train_df.drop(columns=["total_pickups"])

y_train = train_df["total_pickups"]

In [9]:
X_train.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185.0,4


In [10]:
# make X_test and y_test

X_test = test_df.drop(columns=["total_pickups"])

y_test = test_df["total_pickups"]

In [11]:
X_test.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-03-01 00:00:00,36.0,44.0,31.0,29.0,0,39.0,1
2016-03-01 00:15:00,41.0,36.0,44.0,31.0,0,37.0,1
2016-03-01 00:30:00,35.0,41.0,36.0,44.0,0,41.0,1
2016-03-01 00:45:00,47.0,35.0,41.0,36.0,0,38.0,1
2016-03-01 01:00:00,34.0,47.0,35.0,41.0,0,35.0,1


In [12]:
from sklearn import set_config

set_config(transform_output="pandas")

In [13]:
# encode the data

encoder = ColumnTransformer([
    ("ohe", OneHotEncoder(drop="first",sparse_output=False), ["region","day_of_week"])
], remainder="passthrough", n_jobs=-1,force_int_remainder_cols=False)

In [14]:
encoder

0,1,2
,transformers,"[('ohe', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,-1
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [15]:
# encode the train and test data

X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)



In [16]:
import optuna
import tqdm 

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
# set the experiment

mlflow.set_experiment("Model Selection")

2025/07/14 19:20:32 INFO mlflow.tracking.fluent: Experiment with name 'Model Selection' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/be86187d05574ccb83f9d060e8ffbd27', creation_time=1752492031440, experiment_id='0', last_update_time=1752492031440, lifecycle_stage='active', name='Model Selection', tags={}>

In [19]:
def objective(trial):
    # start the child run
    with mlflow.start_run(nested=True) as child:
        
        # model name search space
        list_of_models = ["LR", "RF", "GBR", "XGBR"]
        model_name = trial.suggest_categorical("model_name", list_of_models)
    
        if model_name == "LR":
            model = LinearRegression()
    
        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,100,step=10)
            max_depth_rf = trial.suggest_int("max_depth_rf",3,10)
            model = RandomForestRegressor(n_estimators=n_estimators_rf, 
                                          max_depth=max_depth_rf, 
                                          random_state=42, n_jobs=-1)
    
        elif model_name == "GBR":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,100,step=10)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",1e-4,1e-1, log=True)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb, 
                                              learning_rate=learning_rate_gb,
                                             random_state=42)
    
        elif model_name == "XGBR":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,100,step=10)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",1e-4,1e-1, log=True)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",3,10)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                learning_rate=learning_rate_xgb,
                                max_depth=max_depth_xgb,
                                tree_method='gpu_hist',  
                                gpu_id=0)
    
        # log the model name
        mlflow.log_param("model_name",model_name)
        
        # log the model parameters
        mlflow.log_params(model.get_params())

        # fit on the data
        model.fit(X_train_encoded,y_train)
    
        # get the predictions
        y_pred = model.predict(X_test_encoded)
    
        # calculate the loss
        loss = mean_absolute_percentage_error(y_test, y_pred)
    
        # log the metric
        mlflow.log_metric("MAPE",loss)
        return loss

In [20]:
# optimize the objective function

with mlflow.start_run(run_name="best_model", nested=True) as parent:

    # create a study object
    study = optuna.create_study(study_name="model_selection", direction="minimize")
    # optimize the objective function
    study.optimize(func=objective, n_trials=50, n_jobs=-1)
    
    # log the best parameters
    mlflow.log_params(study.best_params)
    # log the best error value
    mlflow.log_metric("Best_MAPE", study.best_value)

[I 2025-07-14 19:35:31,085] A new study created in memory with name: model_selection


🏃 View run amusing-crab-73 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/a5b383a25f7247729aa5d2f171d03bd6
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:35:46,610] Trial 0 finished with value: 0.3466868818531784 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 4}. Best is trial 0 with value: 0.3466868818531784.


🏃 View run stately-skink-18 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/0251ec1de9954107b4a0bfb9eefd3a1d
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:35:56,924] Trial 2 finished with value: 0.34297658634125283 and parameters: {'model_name': 'RF', 'n_estimators_rf': 10, 'max_depth_rf': 4}. Best is trial 2 with value: 0.34297658634125283.


🏃 View run judicious-goat-860 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2b6f7468c2c74dbc95af949ab2358110
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run masked-asp-235 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/428abae35708443885b914909ab93cf1
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:36:05,151] Trial 3 finished with value: 0.5443209920941998 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 3}. Best is trial 2 with value: 0.34297658634125283.

    E.g. tree_method = "hist", device = "cuda"

[I 2025-07-14 19:36:09,317] Trial 4 finished with value: 0.17696513204101527 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 7}. Best is trial 4 with value: 0.17696513204101527.

    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




🏃 View run entertaining-gnu-162 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/a8900a2df2264a94aa2b3e247e7ed098
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run puzzled-dog-53 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/6ff17e8b03d84e588b72bba665ef0e1a
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:36:16,576] Trial 1 finished with value: 6.438767475411499 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 50, 'learning_rate_gb': 0.0004519784847378164}. Best is trial 4 with value: 0.17696513204101527.

    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-07-14 19:36:21,704] Trial 5 finished with value: 6.506016331221051 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 70, 'learning_rate_gb': 0.00016560142221003153}. Best is trial 4 with value: 0.17696513204101527.


🏃 View run upset-lynx-379 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/af65e2a765c7483ba831934cefbd05a8
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run bedecked-skink-652 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/13f0b32d7a894de68b6f2fd2afd0b422
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-07-14 19:36:41,285] Trial 6 finished with value: 1.9527190923690796 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 80, 'learning_rate_xgb': 0.015928447835696483, 'max_depth_xgb': 4}. Best is trial 4 with value: 0.17696513204101527.
[I 2025-07-14 19:36:42,411] Trial 7 finished with value: 0.17757159724163896 and parameters: {'model_name': 'RF', 'n_estimators_rf': 20, 'max_depth_rf': 7}. Best is trial 4 with value: 0.17696513204101527.


🏃 View run enthused-worm-751 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/a34991f3045848da9248c4750c9c0c03
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run bittersweet-ray-670 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/304de6bf4ad84351acd2f2b6a746c905
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:36:48,580] Trial 8 finished with value: 6.510979652404785 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 40, 'learning_rate_xgb': 0.00025768571197843954, 'max_depth_xgb': 7}. Best is trial 4 with value: 0.17696513204101527.
[I 2025-07-14 19:36:52,723] Trial 9 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run glamorous-fly-916 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/3d945fc7308a4bd38b271d8b1e2dcf9e
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:37:08,167] Trial 11 finished with value: 6.455358982086182 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 70, 'learning_rate_xgb': 0.0002730965183876366, 'max_depth_xgb': 5}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run silent-panda-648 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/7095e8d766c2453da59ba72dd0573f41
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:37:14,412] Trial 10 finished with value: 6.458682173798339 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 70, 'learning_rate_gb': 0.0002761208982518442}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run bemused-sloth-614 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/f36a7f0bb0a14591b47b01477357a42a
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run stately-perch-146 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/9b3a5dcd6f494c19a8bb1dbd4ef96c21
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:37:28,844] Trial 13 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run nebulous-shark-990 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/b9a93ecb52ef42149af0ac829161ac55
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:37:30,896] Trial 12 finished with value: 0.8567558024188635 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 30, 'learning_rate_gb': 0.0753031874649297}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run peaceful-colt-133 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/b59b542d6d134e4296556b37320319c9
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:37:36,040] Trial 14 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:37:40,176] Trial 15 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run hilarious-crow-24 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2090165659344dd38ea9f6b23b3fdcfd
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:37:55,715] Trial 16 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run rebellious-shad-274 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/05aaf1f9bf3746aaa3e3f570aed6947a
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:38:02,932] Trial 17 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run merciful-smelt-862 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/ff6c6debe49740bab2080cb7ccab7079
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run adaptable-cod-444 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/8b1abc929814440ea13de432dd951968
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:38:16,409] Trial 18 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run overjoyed-ant-390 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/5622301af4a342fdab985a603ad96805
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:38:18,466] Trial 19 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run orderly-fly-803 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/4d7dc095a00f469d97645374a9d48715
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:38:23,609] Trial 20 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:38:27,749] Trial 21 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run merciful-eel-387 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2fc58abeb4d042d8b259ec98bfb0fa68
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:38:44,229] Trial 22 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run exultant-perch-754 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/9d5c1e5b3d744468ac52b0b615e96854
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:38:50,479] Trial 23 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run luxuriant-shrew-862 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/0030ccfac1574359a9a38360e4e2ab94
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run upbeat-crow-638 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2a8e52c5f357414fa85108a338559607
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:04,927] Trial 24 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run painted-mole-300 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/75ccc7bbe50049f99573d2bb5e2c2437
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:06,976] Trial 25 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run adventurous-lynx-93 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/5367eec38053414489336b6ab80fda5c
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:12,141] Trial 26 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:39:16,241] Trial 27 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run amusing-yak-244 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/b48d34423fa344019cdc52d6bda753e2
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:31,761] Trial 28 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run thundering-sheep-414 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/82ba7d7fba024bab9683bd9c8d000324
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:39,001] Trial 29 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run lyrical-skunk-699 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/4eb0d2856b1a4acf99c8ac40a31d41c3
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run dapper-sponge-139 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/017d62ca9e034952a476f5e0ade74223
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-07-14 19:39:52,476] Trial 30 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run defiant-koi-934 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/5653e754b7714c779a4aeb32a78956f8
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:54,544] Trial 31 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run silent-newt-940 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2ac384a090fd4f7fa29507001db8893c
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:39:59,664] Trial 32 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:40:03,790] Trial 33 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.

    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



🏃 View run enchanting-gull-542 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/f2d0343c234442c3a564f00912114c63
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-07-14 19:40:20,319] Trial 34 finished with value: 2.52119517326355 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.09318226964177724, 'max_depth_xgb': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run caring-trout-466 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e21640cbd6f74ab09df376a24ca99ca1
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

[I 2025-07-14 19:40:26,570] Trial 35 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run sassy-roo-772 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/f3ebe5a6fc934f9985ddafbde360ddc3
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run silent-mouse-609 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/339104f85dc8466c8d5ecbbf694eb401
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run receptive-turtle-882 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/24353979fac049f5956d5bdd50ce5a03
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:40:42,000] Trial 36 finished with value: 0.07934790285463403 and parameters: {'model_name': 'LR'}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:40:43,032] Trial 37 finished with value: 4.08152437210083 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.047325923405690556, 'max_depth_xgb': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run salty-worm-754 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/3f324d739f0a4935b7f3ca99265a6f1c
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:40:46,161] Trial 38 finished with value: 2.9592363834381104 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.07814003909736787, 'max_depth_xgb': 10}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:40:49,234] Trial 39 finished with value: 2.357081890106201 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.09945366347577382, 'max_depth_xgb': 9}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run secretive-auk-629 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/f8848cea1ef940a490c9b397d88e34a3
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:06,800] Trial 40 finished with value: 0.1284471279720147 and parameters: {'model_name': 'RF', 'n_estimators_rf': 90, 'max_depth_rf': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run thoughtful-gnu-769 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/f1f960f185484c10b6d93b0d1905574c
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:09,888] Trial 41 finished with value: 0.12837921992144385 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run bedecked-cat-194 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/472003f64b354c7792ef6f18a06af8f4
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:27,839] Trial 42 finished with value: 0.12837921992144385 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run crawling-bird-715 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/74c5b3c4655244229f4c2717305a09ed
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:32,327] Trial 43 finished with value: 0.12837921992144385 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run charming-bird-719 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/8e1b4e2b999149e1845ee819ddad8aa5
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:38,717] Trial 44 finished with value: 0.12844712797201474 and parameters: {'model_name': 'RF', 'n_estimators_rf': 90, 'max_depth_rf': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run grandiose-crow-762 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/75ef2a791d2c438cb9632b15dec03634
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:41,724] Trial 45 finished with value: 0.12837921992144385 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 10}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run overjoyed-doe-22 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/8bdcfe1456f840bea7787eb08b8bdbaf
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run persistent-bass-823 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/4e911c1d705e44608acf8af2a5e01553
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:41:50,985] Trial 47 finished with value: 0.959055680443814 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.02183155063060685}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:41:53,042] Trial 46 finished with value: 0.9580102484227003 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.02185872912719904}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run skillful-donkey-900 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2d679f188656419f8c179e3ce13f973b
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run sneaky-quail-580 at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/12c26711b7254d938843760ba4ba4685
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-07-14 19:42:00,342] Trial 48 finished with value: 0.7227710675529212 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.025529484169621287}. Best is trial 9 with value: 0.07934790285463403.
[I 2025-07-14 19:42:01,386] Trial 49 finished with value: 0.8631026006701731 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.023167224141303323}. Best is trial 9 with value: 0.07934790285463403.


🏃 View run best_model at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/1600fc0640f64031a6f2544a3dfa9201
🧪 View experiment at: https://dagshub.com/maverick011/Uber-Demand-Prediction.mlflow/#/experiments/0


In [21]:
# best value

study.best_value

0.07934790285463403

In [22]:
# best parameters

study.best_params


{'model_name': 'LR'}

In [23]:
# model value counts

study.trials_dataframe()['params_model_name'].value_counts()

params_model_name
LR      24
RF      11
GBR      8
XGBR     7
Name: count, dtype: int64

In [29]:
from optuna.visualization import (
    plot_optimization_history, 
    plot_parallel_coordinate, 
    plot_param_importances
)

In [30]:
plot_optimization_history(study)

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

In [31]:
plot_parallel_coordinate(study, params=["model_name"])

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

In [None]:
# train the linear regression model

lr = LinearRegression()

lr.fit(X_train_encoded, y_train)

# get predictions
y_pred_train = lr.predict(X_train_encoded) 
y_pred_test = lr.predict(X_test_encoded)

# loss

mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)

print("The training error is ", mape_train)
print("The test error is ", mape_test)

In [None]:
lr.coef_

In [None]:
def tune_ridge(trial):
    # hyperparameter space
    alpha = trial.suggest_float("alpha",30,100)
    
    # make the model object
    ridge = Ridge(alpha=alpha, random_state=42)
    
    # train the model
    ridge.fit(X_train_encoded, y_train)
    
    # get predictions
    y_pred = ridge.predict(X_test_encoded)
    
    # calculate loss
    loss = mean_absolute_percentage_error(y_test, y_pred)

    return loss
        

In [None]:
# create study

study = optuna.create_study(study_name="tune_model", direction="minimize")

In [None]:
# optimize

study.optimize(func=tune_ridge, n_trials=100, n_jobs=-1, show_progress_bar=True)

In [None]:
# best parameters

study.best_params

In [None]:
# best value

study.best_value