# Importing the packages

In [1]:
import tqdm
import optuna
import mlflow
import dagshub
import numpy as np
import pandas as pd

from xgboost import XGBRegressor
from sklearn import set_config
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

set_config(transform_output="pandas")

In [2]:
# MLFlow and DagsHub

mlflow.set_tracking_uri("https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow")
dagshub.init(repo_owner='SushrutGaikwad', repo_name='taxi-demand-prediction', mlflow=True)

# Loading the data

In [3]:
TRAIN_DATA_PATH = "../data/processed/train.csv"
TEST_DATA_PATH = "../data/processed/test.csv"

train_df = pd.read_csv(TRAIN_DATA_PATH, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")
test_df = pd.read_csv(TEST_DATA_PATH, parse_dates=["tpep_pickup_datetime"]).set_index("tpep_pickup_datetime")

In [4]:
train_df

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,total_pickups,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,187,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,194,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,180,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,197,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185,185.0,4
...,...,...,...,...,...,...,...,...
2016-02-29 22:45:00,15.0,9.0,11.0,11.0,29,12,12.0,0
2016-02-29 23:00:00,12.0,15.0,9.0,11.0,29,17,14.0,0
2016-02-29 23:15:00,17.0,12.0,15.0,9.0,29,15,14.0,0
2016-02-29 23:30:00,15.0,17.0,12.0,15.0,29,15,15.0,0


In [5]:
test_df

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,total_pickups,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-03-01 00:00:00,36.0,44.0,31.0,29.0,0,41,39.0,1
2016-03-01 00:15:00,41.0,36.0,44.0,31.0,0,35,37.0,1
2016-03-01 00:30:00,35.0,41.0,36.0,44.0,0,47,41.0,1
2016-03-01 00:45:00,47.0,35.0,41.0,36.0,0,34,38.0,1
2016-03-01 01:00:00,34.0,47.0,35.0,41.0,0,30,35.0,1
...,...,...,...,...,...,...,...,...
2016-03-31 22:45:00,22.0,14.0,15.0,13.0,29,14,16.0,3
2016-03-31 23:00:00,14.0,22.0,14.0,15.0,29,17,16.0,3
2016-03-31 23:15:00,17.0,14.0,22.0,14.0,29,18,17.0,3
2016-03-31 23:30:00,18.0,17.0,14.0,22.0,29,13,15.0,3


## Checking for missing values

In [6]:
train_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

In [7]:
test_df.isna().sum()

lag_1            0
lag_2            0
lag_3            0
lag_4            0
region           0
total_pickups    0
avg_pickups      0
day_of_week      0
dtype: int64

# Input-output split

In [8]:
X_train = train_df.drop(columns=["total_pickups"])
y_train = train_df["total_pickups"]

X_test = test_df.drop(columns=["total_pickups"])
y_test = test_df["total_pickups"]

In [9]:
X_train.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 01:00:00,160.0,149.0,120.0,58.0,0,161.0,4
2016-01-01 01:15:00,187.0,160.0,149.0,120.0,0,175.0,4
2016-01-01 01:30:00,194.0,187.0,160.0,149.0,0,177.0,4
2016-01-01 01:45:00,180.0,194.0,187.0,160.0,0,185.0,4
2016-01-01 02:00:00,197.0,180.0,194.0,187.0,0,185.0,4


In [10]:
y_train.head()

tpep_pickup_datetime
2016-01-01 01:00:00    187
2016-01-01 01:15:00    194
2016-01-01 01:30:00    180
2016-01-01 01:45:00    197
2016-01-01 02:00:00    185
Name: total_pickups, dtype: int64

In [11]:
X_test.head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,region,avg_pickups,day_of_week
tpep_pickup_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-03-01 00:00:00,36.0,44.0,31.0,29.0,0,39.0,1
2016-03-01 00:15:00,41.0,36.0,44.0,31.0,0,37.0,1
2016-03-01 00:30:00,35.0,41.0,36.0,44.0,0,41.0,1
2016-03-01 00:45:00,47.0,35.0,41.0,36.0,0,38.0,1
2016-03-01 01:00:00,34.0,47.0,35.0,41.0,0,35.0,1


In [12]:
y_test.head()

tpep_pickup_datetime
2016-03-01 00:00:00    41
2016-03-01 00:15:00    35
2016-03-01 00:30:00    47
2016-03-01 00:45:00    34
2016-03-01 01:00:00    30
Name: total_pickups, dtype: int64

# Encoding

In [13]:
encoder = ColumnTransformer(
    [
        ("ohe", OneHotEncoder(drop="first", sparse_output=False), ["region", "day_of_week"]),
    ],
    remainder="passthrough",
    n_jobs=-1,
    force_int_remainder_cols=False,
)
encoder

In [14]:
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

# Model Selection & Hyperparameter Tuning

In [15]:
# Experiment name

mlflow.set_experiment("model_selection")

2025/03/21 23:38:28 INFO mlflow.tracking.fluent: Experiment with name 'model_selection' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/e7025ecf811645d197bc7740aa83c0f9', creation_time=1742614708910, experiment_id='0', last_update_time=1742614708910, lifecycle_stage='active', name='model_selection', tags={}>

In [16]:
def objective(trial):
    # Start with the child run
    with mlflow.start_run(nested=True) as child:
        # Models and their hyperparameter search space
        list_of_models = ["LR", "RF", "GBR", "XGBR"]
        model_name = trial.suggest_categorical("model_name", list_of_models)

        # Going over the models and their hyperparameters
        if model_name == "LR":
            model = LinearRegression()

        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf", 10, 100, step=10)
            max_depth_rf = trial.suggest_int("max_depth_rf", 3, 10)
            model = RandomForestRegressor(
                n_estimators=n_estimators_rf,
                max_depth=max_depth_rf,
                random_state=42,
                n_jobs=-1
            )

        elif model_name == "GBR":
            n_estimators_gb = trial.suggest_int("n_estimators_gb", 10, 100, step=10)
            learning_rate_gb = trial.suggest_float("learning_rate_gb", 1e-4, 1e-1, log=True)
            model = GradientBoostingRegressor(
                n_estimators=n_estimators_gb,
                learning_rate=learning_rate_gb,
                random_state=42
            )

        elif model_name == "XGBR":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb", 10, 100, step=10)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb", 1e-4, 1e-1, log=True)
            max_depth_xgb = trial.suggest_int("max_depth_xgb", 3, 10)
            model = XGBRegressor(
                n_estimators=n_estimators_xgb,
                learning_rate=learning_rate_xgb,
                max_depth=max_depth_xgb
            )

        # Logging the model name
        mlflow.log_param("model_name", model_name)

        # Logging the model parameters
        mlflow.log_params(model.get_params())

        # Training the model
        model.fit(X_train_encoded, y_train)

        # Getting the predictions
        y_pred = model.predict(X_test_encoded)

        # Calculating MAPE
        mape = mean_absolute_percentage_error(y_true=y_test, y_pred=y_pred)

        # Logging MAPE
        mlflow.log_metric("mape", mape)

        return mape

In [17]:
# Optimizing the objective function to get the best model

with mlflow.start_run(run_name="best_model", nested=True) as parent:
    # Creating a study object
    study = optuna.create_study(study_name="model_selection", direction="minimize")
    # Optimizing the objective function
    study.optimize(func=objective, n_trials=50, n_jobs=-1)

    # Log the best parameters
    mlflow.log_params(study.best_params)
    # Log the best MAPE
    mlflow.log_metric("best_mape", study.best_value)

[I 2025-03-22 00:00:26,855] A new study created in memory with name: model_selection


🏃 View run funny-jay-544 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/fb959430774c4fb38c2033746868ee76
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run intelligent-dog-10 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/612598c975594e3e87f9f2168435a07c
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run defiant-quail-89 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/8c362233766a4d6b8efea5b0eb4ffa81
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run fortunate-ox-569 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/851df5b430864fdd95b6dcd9578f7a27
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#

[I 2025-03-22 00:01:13,829] Trial 3 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run auspicious-gnat-418 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/2d118ee3b4ea4765811e673d89815b6b
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:01:15,839] Trial 2 finished with value: 0.10808084905147552 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 100, 'learning_rate_xgb': 0.09782463377770757, 'max_depth_xgb': 8}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:01:17,817] Trial 12 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:01:21,900] Trial 1 finished with value: 1.048230528831482 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 30, 'learning_rate_xgb': 0.06124248879280332, 'max_depth_xgb': 9}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:01:22,826] Trial 19 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:01:24,848] Trial 5 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run handsome-goat-189 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/25e1dc27b1234a418ea07b814b81cf9b
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run capricious-yak-62 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/9592342e9f2a406e85c45772b9982666
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:01:37,829] Trial 16 finished with value: 6.25038480758667 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 90, 'learning_rate_xgb': 0.0005863864414654645, 'max_depth_xgb': 4}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run invincible-ox-640 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/d856b7826c494b6d943e43b54e3416ab
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:02:02,820] Trial 14 finished with value: 6.2228779792785645 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 90, 'learning_rate_xgb': 0.0006371451868598997, 'max_depth_xgb': 4}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run overjoyed-ox-634 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/4b7dff7c82ad44dfaa9b8a29446671ff
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:02:08,851] Trial 21 finished with value: 6.522985935211182 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 30, 'learning_rate_xgb': 0.00028109949791025376, 'max_depth_xgb': 8}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run flawless-seal-672 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/3b54dd31e6c44d539e84a608fb38ec8f
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run magnificent-hound-89 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/885ea7d9676b4fe7a8a210beab03f936
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run capricious-perch-871 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/edf9f4aa79834b70bd7fca134db0e168
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run grandiose-hare-249 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/d60bfb0984b746d284483f328b81af0b
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-predict

[I 2025-03-22 00:02:16,824] Trial 25 finished with value: 0.130290113505722 and parameters: {'model_name': 'RF', 'n_estimators_rf': 10, 'max_depth_rf': 10}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:02:19,848] Trial 11 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:02:20,824] Trial 26 finished with value: 6.1606526374816895 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 90, 'learning_rate_xgb': 0.0007730137093947523, 'max_depth_xgb': 3}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:02:21,826] Trial 27 finished with value: 0.1832185536623001 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 60, 'learning_rate_xgb': 0.06720475916397108, 'max_depth_xgb': 8}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:02:23,845] Trial 15 finished with value: 2.6806978811040105 and parameters: {'model_name': 'GBR', 'n_estim

🏃 View run honorable-lynx-957 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/97758d1baf3249fcb82ff6cf0de85609
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run fortunate-fowl-378 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/e7cfbdcb96ae4727bcd9fc9dba297253
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:02:35,856] Trial 28 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:02:37,858] Trial 30 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run rebellious-ant-328 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/6c71bd9f11d84867b7841fe0a5f4f476
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run crawling-eel-548 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/0ccf4a4a74174121b751015d32a45933
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:02:58,821] Trial 17 finished with value: 6.45842923376763 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 70, 'learning_rate_gb': 0.00027671441965037235}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:03,820] Trial 31 finished with value: 0.17757159724163896 and parameters: {'model_name': 'RF', 'n_estimators_rf': 20, 'max_depth_rf': 7}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run dapper-stag-877 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/73f89f555e7b42d19e0f4cf995f6685a
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run kindly-sloth-785 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/05cc5b6a55994546a010b6982f922185
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run hilarious-quail-331 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/61baadf8ec8d48d5aabac86fc33d1bbc
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run redolent-owl-429 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/afb95cfd45794c2f9fc145533773796e
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflo

[I 2025-03-22 00:03:15,817] Trial 33 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:18,833] Trial 34 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:19,834] Trial 35 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:21,812] Trial 36 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:22,827] Trial 37 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run tasteful-wren-445 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/8b15555b629241efa5966f5ca111cdbb
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run dapper-grouse-759 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/591005ac067948068dac4ae729ea3ee3
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run beautiful-moth-844 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/b2777eb6c0064ca597bb623589a9aba9
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:03:34,824] Trial 32 finished with value: 0.814946733912501 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 80, 'learning_rate_gb': 0.02986573512357426}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:37,817] Trial 10 finished with value: 0.5443209920941997 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 3}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:03:40,830] Trial 39 finished with value: 0.5443209920941996 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 3}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run abrasive-colt-227 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/b4084f762e3547919ba9749a29c8536f
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run enchanting-skink-39 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/2738f3b66ab248f3b332c95b45e538ba
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run redolent-pig-834 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/68a4b888302645d587319ce4172ed2b4
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:03:56,839] Trial 8 finished with value: 6.533738059538262 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 60, 'learning_rate_gb': 0.00011803685141773725}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:03,857] Trial 42 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run learned-rook-577 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/710a64995fec4e5db841cdb50ddbf768
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run exultant-roo-431 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/55663e40039a4b88b8e1b246cd542b36
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run abundant-pug-897 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/932abbf8303e4e3e94f4aab42fb3ef20
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run brawny-wasp-363 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/ea70a8fe4f524ac081ace81d1f16a2d3
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#

[I 2025-03-22 00:04:13,824] Trial 41 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:15,830] Trial 43 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:17,830] Trial 44 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:18,846] Trial 45 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:19,824] Trial 24 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run rogue-trout-200 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/41455f4ba7a04ea788297c0f77880824
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:04:21,825] Trial 46 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:22,852] Trial 47 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run delightful-rat-664 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/335aa9e6eac540c59ec4b6b5cd8b901f
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run merciful-horse-279 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/7d5ec838e0b743e1ab5d9bf14eaecd78
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:04:26,855] Trial 29 finished with value: 6.569347858428955 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.00012545899842526844, 'max_depth_xgb': 10}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:27,820] Trial 48 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run legendary-fish-203 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/6850e6a35d1d4bc2832fd5b1dbbb99eb
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:04:29,856] Trial 49 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:04:30,821] Trial 40 finished with value: 0.3466313143838905 and parameters: {'model_name': 'RF', 'n_estimators_rf': 100, 'max_depth_rf': 4}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run mercurial-slug-599 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/42666ca3eb48430a9118a95ef4a67ea7
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:04:50,849] Trial 38 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run capricious-loon-956 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/997165d085b34559b37ba059e51153a1
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run abrasive-eel-252 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/17d5d3535c054ef09cc948eed5260f38
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run peaceful-sloth-294 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/00197b12b9894b9083000dfb6fef8c86
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:04:59,883] Trial 0 finished with value: 6.560675621032715 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 10, 'learning_rate_xgb': 0.0002592839710808767, 'max_depth_xgb': 9}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:05:02,854] Trial 22 finished with value: 0.6202397346496582 and parameters: {'model_name': 'XGBR', 'n_estimators_xgb': 50, 'learning_rate_xgb': 0.04917376488672416, 'max_depth_xgb': 6}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run rogue-trout-608 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/c240c7e9db644f189a980cc1a54ce463
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:05:04,822] Trial 9 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run flawless-conch-63 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/c0650b3beb90456d80dd38201f99b386
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run gentle-steed-541 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/f1b13896dded4e079a024d48322cbe76
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:05:07,849] Trial 6 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:05:08,822] Trial 4 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:05:09,848] Trial 20 finished with value: 0.07934790285463529 and parameters: {'model_name': 'LR'}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:05:17,842] Trial 7 finished with value: 0.3463017707490639 and parameters: {'model_name': 'RF', 'n_estimators_rf': 60, 'max_depth_rf': 4}. Best is trial 3 with value: 0.07934790285463529.
[I 2025-03-22 00:05:50,000] Trial 18 finished with value: 3.4244388762626796 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 80, 'learning_rate_gb': 0.00883764074057375}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run stylish-dog-341 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/ffba01f1053c438bac8adab03d9066be
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


[I 2025-03-22 00:06:07,912] Trial 13 finished with value: 6.499630903026681 and parameters: {'model_name': 'GBR', 'n_estimators_gb': 30, 'learning_rate_gb': 0.00042106519411305206}. Best is trial 3 with value: 0.07934790285463529.


🏃 View run inquisitive-ram-287 at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/08ebc0d1b437408fbd6079fffec15a33
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0
🏃 View run best_model at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0/runs/db9c6b4e16eb4806900f86e1524fb938
🧪 View experiment at: https://dagshub.com/SushrutGaikwad/taxi-demand-prediction.mlflow/#/experiments/0


In [18]:
# Best MAPE

print(f"The best MAPE is {study.best_value * 100:.2f}%.")

The best MAPE is 7.93%.


In [19]:
# Best model

study.best_params

{'model_name': 'LR'}

In [20]:
# Model counts during the optimization

study.trials_dataframe()["params_model_name"].value_counts()

params_model_name
LR      27
XGBR    10
GBR      7
RF       6
Name: count, dtype: int64

# Training the best model

In [29]:
regressor = LinearRegression()
regressor.fit(X_train_encoded, y_train)

# Predictions

y_pred_train = regressor.predict(X_train_encoded) 
y_pred_test = regressor.predict(X_test_encoded)

# MAPE

mape_train = mean_absolute_percentage_error(y_true=y_train, y_pred=y_pred_train)
mape_test = mean_absolute_percentage_error(y_true=y_test, y_pred=y_pred_test)

print(f"The training MAPE is {mape_train*100:.2f}%.")
print(f"The test MAPE is {mape_test*100:.2f}%.")

The training MAPE is 8.78%.
The test MAPE is 7.93%.


In [30]:
regressor.coef_

array([-2.33737604,  0.71512405, -0.55601505, -1.25311068, -3.20463231,
       -0.86685973, -2.79925402, -3.62516859,  0.41386463, -2.9376376 ,
       -1.97624678, -3.75050442,  0.51806283, -2.54033388, -2.43297463,
        0.47632075,  0.61254786, -4.7417372 , -2.03077217, -1.26960984,
       -4.03690273, -2.08863167, -1.0414428 ,  0.73561736, -0.99999442,
       -0.85944985, -2.43098478,  0.67112238,  0.57385071, -0.11719951,
       -0.28045898, -0.37180749, -0.5238324 , -0.4233113 , -0.34045774,
       -0.54170892, -0.36264553, -0.2493965 , -0.31905518,  2.4912456 ])

Let us try ridge regression, which is a regularized version of linear regression.

In [31]:
def tune_ridge(trial):
    # Hyperparameter space
    alpha = trial.suggest_float("alpha", 30, 100)
    
    # Model
    ridge = Ridge(alpha=alpha, random_state=42)
    
    # Training
    ridge.fit(X_train_encoded, y_train)
    
    # Predicting
    y_pred = ridge.predict(X_test_encoded)
    
    # MAPE
    mape = mean_absolute_percentage_error(y_true=y_test, y_pred=y_pred)

    return mape

In [32]:
# Create study

study = optuna.create_study(study_name="tune_ridge", direction="minimize")

[I 2025-03-22 00:30:08,781] A new study created in memory with name: tune_ridge


In [34]:
# optimize

study.optimize(func=tune_ridge, n_trials=100, n_jobs=-1, show_progress_bar=True)

  0%|          | 0/100 [00:00<?, ?it/s]

[I 2025-03-22 00:31:19,858] Trial 45 finished with value: 0.07918544977905692 and parameters: {'alpha': 83.68581987244933}. Best is trial 42 with value: 0.07916408447959469.
[I 2025-03-22 00:31:20,064] Trial 46 finished with value: 0.07918567126002436 and parameters: {'alpha': 83.52663433389492}. Best is trial 42 with value: 0.07916408447959469.
[I 2025-03-22 00:31:20,095] Trial 47 finished with value: 0.07918311756121663 and parameters: {'alpha': 85.37405241031513}. Best is trial 42 with value: 0.07916408447959469.
[I 2025-03-22 00:31:20,135] Trial 51 finished with value: 0.07918458695645363 and parameters: {'alpha': 84.30741244451377}. Best is trial 42 with value: 0.07916408447959469.
[I 2025-03-22 00:31:20,176] Trial 49 finished with value: 0.07918238302795762 and parameters: {'alpha': 85.9109455964816}. Best is trial 42 with value: 0.07916408447959469.
[I 2025-03-22 00:31:20,206] Trial 48 finished with value: 0.07918748666258128 and parameters: {'alpha': 82.23005242847847}. Best is

In [35]:
# Best parameters

study.best_params

{'alpha': 99.99555980666432}

In [37]:
# Best MAPE

print(f"The best MAPE using ridge regression is {study.best_value*100:.2f}%.")

The best MAPE using ridge regression is 7.92%.


So we will go with the linear regression model. We won't do any hyperparameter tuning for it as it does not have any hyperparameters.