In [43]:
pip install mlflow



In [45]:
pip install optuna




In [46]:
pip install dagshub



In [85]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.svm import SVR

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

import mlflow
import dagshub
import os
import optuna
import tqdm

In [86]:
# ✅ Set the correct Dagshub tracking URI (not GitHub!)
mlflow.set_tracking_uri("https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow")


# ✅ Set your Dagshub username and token (ensure token is kept secret)
os.environ["MLFLOW_TRACKING_USERNAME"] = "Mervin50"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "4f0ea46f344db4274bbaa99ff0d92ba94fcdc08b"



In [87]:
# Correct Dagshub MLflow URI
mlflow.set_tracking_uri("https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow")

# Set credentials
os.environ["MLFLOW_TRACKING_USERNAME"] = "Mervin50"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "4f0ea46f344db4274bbaa99ff0d92ba94fcdc08b"

# Initialize DagsHub MLflow integration
dagshub.init(repo_owner='Mervin50', repo_name='Uber-Demand-Prediction', mlflow=True)

# Set or create experiment
mlflow.set_experiment("model-selection")


<Experiment: artifact_location='mlflow-artifacts:/b83d4cad915b4c1daf9d3a642caa1e17', creation_time=1746318342413, experiment_id='0', last_update_time=1746318342413, lifecycle_stage='active', name='model-selection', tags={}>

In [88]:
# load the training and test data

train_data_path = "/content/df_final_train.csv"
test_data_path = "/content/df_final_test.csv"

train_df = pd.read_csv(train_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

test_df = pd.read_csv(test_data_path, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

train_df

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,total_pickups,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 01:00:00,10.0,10.0,10.0,13.0,0,10,10.0,4
2016-01-01 01:15:00,10.0,10.0,10.0,10.0,0,10,10.0,4
2016-01-01 01:30:00,10.0,10.0,10.0,10.0,0,10,10.0,4
2016-01-01 01:45:00,10.0,10.0,10.0,10.0,0,10,10.0,4
2016-01-01 02:00:00,10.0,10.0,10.0,10.0,0,10,10.0,4
...,...,...,...,...,...,...,...,...
2016-02-29 22:45:00,10.0,10.0,10.0,10.0,29,10,10.0,0
2016-02-29 23:00:00,10.0,10.0,10.0,10.0,29,10,10.0,0
2016-02-29 23:15:00,10.0,10.0,10.0,10.0,29,10,10.0,0
2016-02-29 23:30:00,10.0,10.0,10.0,10.0,29,10,10.0,0


In [89]:
# missing value in training data

train_df.isna().sum()

Unnamed: 0,0
lag_1,0
lag_2,0
lag_3,0
lag_4,0
region,0
total_pickups,0
avg_pickups,0
day_of_week,0


In [90]:
# missing values in the test data

test_df.isna().sum()

Unnamed: 0,0
lag_1,0
lag_2,0
lag_3,0
lag_4,0
region,0
total_pickups,0
avg_pickups,0
day_of_week,0


In [91]:
# make X_train and y_train

X_train = train_df.drop(columns=["total_pickups"])

y_train = train_df["total_pickups"]

In [92]:
X_train.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 01:00:00,10.0,10.0,10.0,13.0,0,10.0,4
2016-01-01 01:15:00,10.0,10.0,10.0,10.0,0,10.0,4
2016-01-01 01:30:00,10.0,10.0,10.0,10.0,0,10.0,4
2016-01-01 01:45:00,10.0,10.0,10.0,10.0,0,10.0,4
2016-01-01 02:00:00,10.0,10.0,10.0,10.0,0,10.0,4


In [93]:
# make X_test and y_test

X_test = test_df.drop(columns=["total_pickups"])

y_test = test_df["total_pickups"]

In [94]:
X_test.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-03-01 00:00:00,10.0,10.0,10.0,10.0,0,10.0,1
2016-03-01 00:15:00,11.0,10.0,10.0,10.0,0,10.0,1
2016-03-01 00:30:00,10.0,11.0,10.0,10.0,0,10.0,1
2016-03-01 00:45:00,10.0,10.0,11.0,10.0,0,10.0,1
2016-03-01 01:00:00,10.0,10.0,10.0,11.0,0,10.0,1


In [95]:
from sklearn import set_config

set_config(transform_output="pandas")

In [96]:
# encode the data

encoder = ColumnTransformer([
    ("ohe", OneHotEncoder(drop="first",sparse_output=False), ["region","day_of_week"])
], remainder="passthrough", n_jobs=-1,force_int_remainder_cols=False)

In [97]:
encoder

In [98]:
# encode the train and test data

X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

In [99]:
def objective(trial):
    # start the child run
    with mlflow.start_run(nested=True) as child:

        # model name search space
        list_of_models = ["LR", "RF", "GBR", "XGBR"]
        model_name = trial.suggest_categorical("model_name", list_of_models)

        if model_name == "LR":
            model = LinearRegression()

        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,100,step=10)
            max_depth_rf = trial.suggest_int("max_depth_rf",3,10)
            model = RandomForestRegressor(n_estimators=n_estimators_rf,
                                          max_depth=max_depth_rf,
                                          random_state=42, n_jobs=-1)

        elif model_name == "GBR":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,100,step=10)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",1e-4,1e-1, log=True)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb,
                                              learning_rate=learning_rate_gb,
                                             random_state=42)

        elif model_name == "XGBR":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,100,step=10)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",1e-4,1e-1, log=True)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",3,10)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                learning_rate=learning_rate_xgb,
                                max_depth=max_depth_xgb)

        # log the model name
        mlflow.log_param("model_name",model_name)

        # log the model parameters
        mlflow.log_params(model.get_params())

        # fit on the data
        model.fit(X_train_encoded,y_train)

        # get the predictions
        y_pred = model.predict(X_test_encoded)

        # calculate the loss
        loss = mean_absolute_percentage_error(y_test, y_pred)

        # log the metric
        mlflow.log_metric("MAPE",loss)
        return loss

In [100]:
# optimize the objective function

with mlflow.start_run(run_name="best_model", nested=True) as parent:

    # create a study object
    study = optuna.create_study(study_name="model_selection", direction="minimize")
    # optimize the objective function
    study.optimize(func=objective, n_trials=50, n_jobs=-1)

    # log the best parameters
    mlflow.log_params(study.best_params)
    # log the best error value
    mlflow.log_metric("Best_MAPE", study.best_value)

[I 2025-05-04 00:26:53,471] A new study created in memory with name: model_selection
[I 2025-05-04 00:27:02,691] Trial 1 finished with value: 0.0010679158292697401 and parameters: {'model_name': 'RF', 'n_estimators_rf': 20, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run valuable-fly-767 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/ddb4e03d310643bd8d34637643ca6f68
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:04,376] Trial 2 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run zealous-gnat-194 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e42f54c6c0e74cafab12dad717c91aeb
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:10,160] Trial 3 finished with value: 0.0014810825266393505 and parameters: {'model_name': 'RF', 'n_estimators_rf': 20, 'max_depth_rf': 6}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run abundant-grub-491 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/b25800f17d704efc9f6fefec9f2b2b0a
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run abrasive-crow-577 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/7ffb281f065b4b95a18d35f6fd8b6639
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:12,478] Trial 4 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run invincible-sow-493 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/121defd0c88d42748104f3d14809a3e9
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:18,460] Trial 0 finished with value: 0.0011423252257866654 and parameters: {'model_name': 'RF', 'n_estimators_rf': 70, 'max_depth_rf': 7}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run funny-pug-907 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e01600d6312a4afe9aefeb9254c8e6f8
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:24,476] Trial 5 finished with value: 0.005012365523725748 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 100, 'learning_rate_xgb': 0.024139216785264445, 'max_depth_xgb': 3}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run gaudy-penguin-696 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/fe42cd53a69744478bd525a4542b0f01
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:29,456] Trial 6 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run capricious-midge-413 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/623b54afdb3a46b0b0bc60eb4ca47af1
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:36,473] Trial 7 finished with value: 0.012420529499650002 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 20, 'learning_rate_xgb': 0.01236734154143089, 'max_depth_xgb': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run mysterious-goose-221 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/a8ca4d50d2dd4c16b288f3fa81f5c538
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:42,478] Trial 8 finished with value: 0.015283496119081974 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 100, 'learning_rate_xgb': 0.0002545815708874001, 'max_depth_xgb': 5}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run secretive-quail-509 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/c8d2258cfa804d4e803d2f8684b02705
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:48,461] Trial 10 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run welcoming-bird-769 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/0d170ba27e1949fe9511f3fed4978f14
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:27:54,458] Trial 9 finished with value: 0.015193769093682894 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 70, 'learning_rate_gb': 0.0006246651376728573}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run grandiose-smelt-823 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/2dab0b76eef84c13811e6b2072318b7e
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:28:00,485] Trial 11 finished with value: 0.012188968525283502 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 10, 'learning_rate_gb': 0.03973267968729593}. Best is trial 1 with value: 0.0010679158292697401.
[I 2025-05-04 00:28:34,850] Trial 12 finished with value: 0.0011002689489442276 and parameters: {'model_name': 'RF', 'n_estimators_rf': 80, 'max_depth_rf': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run funny-mare-873 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/b275ca99de3d4069aadf494723690e5d
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:28:36,740] Trial 13 finished with value: 0.0010822212978730207 and parameters: {'model_name': 'RF', 'n_estimators_rf': 80, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run resilient-croc-258 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/416f448b486f4f6398bd9399c07f17ef
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:28:41,796] Trial 15 finished with value: 0.0011661218688384125 and parameters: {'model_name': 'RF', 'n_estimators_rf': 10, 'max_depth_rf': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run ambitious-dog-275 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/6876e60acb3e438b97bda813269696e5
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:29:19,439] Trial 16 finished with value: 0.0010934731808384726 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 8}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run adaptable-wolf-355 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/c8c29d04632847069d83d62828af70ed
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:29:20,037] Trial 14 finished with value: 0.001101037826794604 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run delightful-kit-124 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/cc252291813846cab35edd2186cfa499
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:29:25,656] Trial 17 finished with value: 0.00535282919791297 and parameters: {'model_name': 'RF', 'n_estimators_rf': 40, 'max_depth_rf': 3}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run hilarious-cat-641 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/7a4c13233de547578276f481e3c9b970
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run receptive-whale-14 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e8d436a679be437293260c46c2fdff5e
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:29:29,257] Trial 18 finished with value: 0.0021188317073696492 and parameters: {'model_name': 'RF', 'n_estimators_rf': 40, 'max_depth_rf': 4}. Best is trial 1 with value: 0.0010679158292697401.
[I 2025-05-04 00:29:37,491] Trial 19 finished with value: 0.00109437888250338 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 8}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run selective-fish-795 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/cd37b006a2914179915d160925a78388
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:29:51,496] Trial 20 finished with value: 0.015524717408433376 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.0001112269362884935}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run gregarious-ant-888 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/bb0ca74185f94fa38b0b94469d63e540
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:29:57,133] Trial 21 finished with value: 0.015520476446218573 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 100, 'learning_rate_gb': 0.00011533909822621753}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run abundant-hare-251 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/379fc8ab53014fbaa28b0e83f43eb8b1
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:30:26,403] Trial 22 finished with value: 0.0010934731808384726 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 8}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run big-boar-357 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/925bea70a5d848308722208a46a4d129
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:30:34,527] Trial 23 finished with value: 0.0010934731808384726 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 8}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run wistful-asp-165 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/0de71eed04ca4b90bd196096a5e0f1e1
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:30:55,284] Trial 24 finished with value: 0.001097825878942865 and parameters: {'model_name': 'RF', 'n_estimators_rf': 80, 'max_depth_rf': 8}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run upset-bear-839 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/de4b4865dfb34f8aa3a40f603b19dc89
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:31:07,204] Trial 25 finished with value: 0.0010822212978730207 and parameters: {'model_name': 'RF', 'n_estimators_rf': 80, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run handsome-midge-574 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e3d7209c2eda46a5818150917c92b5e2
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:31:29,779] Trial 26 finished with value: 0.0010822212978730207 and parameters: {'model_name': 'RF', 'n_estimators_rf': 80, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run amusing-kite-285 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/df11bac64cdc48a194615497e74abfb9
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:31:36,473] Trial 27 finished with value: 0.0010753978551729555 and parameters: {'model_name': 'RF', 'n_estimators_rf': 70, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run tasteful-sloth-391 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/a54721df7f954a8792664b259f9fd0df
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:31:39,307] Trial 29 finished with value: 0.015606616623699665 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 20, 'learning_rate_xgb': 0.00011335051453833905, 'max_depth_xgb': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run masked-skink-184 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/9fb6bc7ee92749f3b1c25ad2842f7fca
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:31:45,229] Trial 28 finished with value: 0.001438899341664779 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 6}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run efficient-dove-255 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e25c584d275b4db88d2e4505157a9c7c
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:31:55,731] Trial 30 finished with value: 0.001438899341664779 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 6}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run redolent-robin-276 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/3aa0b5ca845b4853bd725529a99e6b66
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:11,875] Trial 31 finished with value: 0.0010732978260653229 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run youthful-wren-764 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/1feb5ab30d16439a8b6d38beaee14860
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:24,355] Trial 32 finished with value: 0.0010753978551729553 and parameters: {'model_name': 'RF', 'n_estimators_rf': 70, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run marvelous-mule-552 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/faac0a3ff0724c6e9e5e43f56620544a
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:40,606] Trial 33 finished with value: 0.0010753978551729553 and parameters: {'model_name': 'RF', 'n_estimators_rf': 70, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run respected-croc-547 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/296105d1cf1446ccbd5270fb46f9803e
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:41,671] Trial 35 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run thoughtful-loon-305 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/20f85101d8d3491bbeb32a851e4f4911
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:45,450] Trial 34 finished with value: 0.0010701598608913346 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run big-rat-908 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/0df84fb713c64d1b8bf171a4b2b2fadb
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run gregarious-crow-431 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/d4a0ae9494df416e920d2b3c587d8db4
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:56,886] Trial 37 finished with value: 0.0011447576034146491 and parameters: {'model_name': 'RF', 'n_estimators_rf': 30, 'max_depth_rf': 7}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run efficient-fowl-592 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/003e358b2243488190c2224fdf8e3d03
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:32:59,863] Trial 38 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run popular-bear-782 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/b16b7590ef6b449793908c2edc6acac3
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:33:02,428] Trial 36 finished with value: 0.0010701598608913348 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run likeable-sloth-975 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/82e1302db37f4005be5cc062c5cc0c51
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:33:07,438] Trial 39 finished with value: 0.0010971123119816184 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 60, 'learning_rate_xgb': 0.0976659678405879, 'max_depth_xgb': 7}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run inquisitive-eel-282 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/5c208fe03407417f8cbff569148ff5fa
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:33:14,433] Trial 40 finished with value: 0.014148646034300327 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 60, 'learning_rate_xgb': 0.0018024359168593476, 'max_depth_xgb': 7}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run selective-mole-816 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/e1f53a72b0ec4136a5b4ebb289702700
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:33:19,456] Trial 41 finished with value: 0.014360549859702587 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 60, 'learning_rate_xgb': 0.001534639867653674, 'max_depth_xgb': 7}. Best is trial 1 with value: 0.0010679158292697401.
[I 2025-05-04 00:33:42,735] Trial 42 finished with value: 0.0011137790534685326 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run redolent-shoat-203 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/f17f328fa5ae449a9d3294c7924818fe
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:33:46,015] Trial 43 finished with value: 0.001113779053468368 and parameters: {'model_name': 'RF', 'n_estimators_rf': 50, 'max_depth_rf': 10}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run carefree-duck-262 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/8042dd0e9a7b4aad89de1f5f2597abb1
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:33:59,984] Trial 44 finished with value: 0.0010929281591939705 and parameters: {'model_name': 'RF', 'n_estimators_rf': 40, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run enchanting-eel-246 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/a838e8f9f73246a68db123655b64298b
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:34:02,944] Trial 45 finished with value: 0.0010929281591939702 and parameters: {'model_name': 'RF', 'n_estimators_rf': 40, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run able-dolphin-752 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/3db8c119a4d4421f8c4628029df3db52
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:34:06,405] Trial 47 finished with value: 0.009836583911705573 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 10, 'learning_rate_gb': 0.0748957104197292}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run redolent-colt-945 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/0aedbefc948542f1be9326e4a1574213
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run skittish-newt-374 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/7424b648d1d9432fa5957ffeca4fd531
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:34:09,820] Trial 48 finished with value: 0.0022700453129822283 and parameters: {'model_name': 'LR'}. Best is trial 1 with value: 0.0010679158292697401.
[I 2025-05-04 00:34:18,630] Trial 46 finished with value: 0.0010732978260651466 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 9}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run silent-sponge-957 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/640b8e3f7cbc4a8296c530c502500804
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


[I 2025-05-04 00:34:26,332] Trial 49 finished with value: 0.0010930424696939899 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 8}. Best is trial 1 with value: 0.0010679158292697401.


🏃 View run illustrious-deer-656 at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/d2a195b72aed413080b78eb987014973
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0
🏃 View run best_model at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0/runs/26ac1a16b1424c06a0f22e54be776e89
🧪 View experiment at: https://dagshub.com/Mervin50/Uber-Demand-Prediction.mlflow/#/experiments/0


In [101]:
# best value

study.best_value

0.0010679158292697401

In [102]:
# best parameters

study.best_params

{'model_name': 'RF', 'n_estimators_rf': 20, 'max_depth_rf': 9}

In [103]:
# model value counts

study.trials_dataframe()['params_model_name'].value_counts()

Unnamed: 0_level_0,count
params_model_name,Unnamed: 1_level_1
RF,31
LR,7
XGBR,7
GBR,5


In [104]:
from optuna.visualization import (
    plot_optimization_history,
    plot_parallel_coordinate,
    plot_param_importances
)

In [105]:
plot_optimization_history(study)

In [106]:
plot_parallel_coordinate(study, params=["model_name"])

In [107]:
# train the linear regression model

lr = LinearRegression()

lr.fit(X_train_encoded, y_train)

# get predictions
y_pred_train = lr.predict(X_train_encoded)
y_pred_test = lr.predict(X_test_encoded)

# loss

mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)

print("The training error is ", mape_train)
print("The test error is ", mape_test)

The training error is  0.004087547467779396
The test error is  0.0022700453129822283


In [108]:
lr.coef_

array([ 1.07188818e-02,  2.04381318e-02,  1.58179478e-02,  1.22019589e-02,
       -1.69417797e-03,  9.40430213e-03,  1.76632645e-02,  2.23484703e-02,
        1.12982045e-02,  5.77093599e-03,  1.56360199e-02,  2.29339303e-02,
        2.24345394e-02, -7.49473355e-04,  8.61125591e-03,  2.17939256e-02,
        6.31277515e-03,  1.56799014e-03,  1.36345703e-02, -3.41022413e-03,
       -1.41208551e-02,  1.23408592e-02,  1.44708670e-02,  8.54521593e-03,
        1.56865637e-02,  8.99047121e-03,  7.25532432e-03,  2.02947358e-02,
       -5.40647528e-03,  9.76550792e-04,  9.76550792e-04,  9.31535919e-04,
       -4.95531112e-02, -4.15475346e-02,  3.82372428e-04, -5.56543756e-01,
       -3.68042454e-01, -1.95516216e-01, -2.02177869e-01,  2.43401886e+00])

In [109]:
def tune_ridge(trial):
    # hyperparameter space
    alpha = trial.suggest_float("alpha",30,100)

    # make the model object
    ridge = Ridge(alpha=alpha, random_state=42)

    # train the model
    ridge.fit(X_train_encoded, y_train)

    # get predictions
    y_pred = ridge.predict(X_test_encoded)

    # calculate loss
    loss = mean_absolute_percentage_error(y_test, y_pred)

    return loss


In [110]:
# create study

study = optuna.create_study(study_name="tune_model", direction="minimize")

[I 2025-05-04 00:35:44,441] A new study created in memory with name: tune_model


In [111]:
# optimize

study.optimize(func=tune_ridge, n_trials=100, n_jobs=-1, show_progress_bar=True)

  0%|          | 0/100 [00:00<?, ?it/s]

[I 2025-05-04 00:35:45,422] Trial 1 finished with value: 0.002246779177660719 and parameters: {'alpha': 88.28707879603172}. Best is trial 1 with value: 0.002246779177660719.
[I 2025-05-04 00:35:45,442] Trial 0 finished with value: 0.0022490878851674748 and parameters: {'alpha': 78.22171930856442}. Best is trial 1 with value: 0.002246779177660719.
[I 2025-05-04 00:35:46,053] Trial 3 finished with value: 0.002251792277468619 and parameters: {'alpha': 66.75318136679329}. Best is trial 1 with value: 0.002246779177660719.
[I 2025-05-04 00:35:46,059] Trial 2 finished with value: 0.002256509560063078 and parameters: {'alpha': 47.8412624314159}. Best is trial 1 with value: 0.002246779177660719.
[I 2025-05-04 00:35:46,599] Trial 5 finished with value: 0.0022589705499201414 and parameters: {'alpha': 38.44784977208617}. Best is trial 1 with value: 0.002246779177660719.
[I 2025-05-04 00:35:46,612] Trial 4 finished with value: 0.0022599992648744604 and parameters: {'alpha': 34.61134166110391}. Best

In [112]:
# best parameters

study.best_params

{'alpha': 99.99201573335183}

In [113]:
# best value

study.best_value

0.0022441624883497036