In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pylab as plt
#%pip install seaborn
import seaborn as sns

from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.compose import ColumnTransformer,TransformedTargetRegressor
from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder,PowerTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor,StackingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.impute import SimpleImputer, KNNImputer, MissingIndicator


from sklearn import set_config

%pip install dagshub mlflow optuna
import mlflow
import mlflow.sklearn
import dagshub


from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from lightgbm import LGBMRegressor

import optuna


set_config(transform_output="pandas")
plt.style.use('fivethirtyeight')

Note: you may need to restart the kernel to use updated packages.


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dagshub.init(repo_owner='Arej02', repo_name='swiggy_delivery_time_prediction', mlflow=True)

In [3]:
mlflow.set_tracking_uri("https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow")

In [4]:
mlflow.set_experiment("Exp4:Stacking Regressor")

<Experiment: artifact_location='mlflow-artifacts:/490f6187b69843608bb8fdd7589c45dd', creation_time=1758539296230, experiment_id='3', last_update_time=1758539296230, lifecycle_stage='active', name='Exp4:Stacking Regressor', tags={}>

In [5]:
swiggy=pd.read_csv("swiggy_cleaned.csv")

In [6]:
swiggy.sample(3)

Unnamed: 0,rider_id,age,ratings,restaurant_latitude,restaurant_longitude,delivery_latitude,delivery_longitude,order_date,weather,traffic,...,time_taken,city_name,order_day,order_month,order_day_of_week,is_weekend,pickup_time_minutes,order_time_hour,order_time_of_day,distance
2301,JAPRES12DEL02,23.0,4.6,26.902908,75.792934,26.982908,75.872934,2022-03-06,fog,medium,...,39,JAP,6,3,sunday,1,5.0,17.0,evening,11.917102
33957,RANCHIRES17DEL02,,,23.374878,85.335739,23.504878,85.465739,2022-03-29,,,...,28,RANCHI,29,3,tuesday,0,,,after_midnight,19.617581
33762,HYDRES010DEL03,39.0,4.6,17.428294,78.404423,17.518294,78.494423,2022-03-18,windy,jam,...,25,HYD,18,3,friday,0,15.0,19.0,evening,13.830131


In [7]:
swiggy['distance'].describe()

count    41872.000000
mean         9.719296
std          5.602890
min          1.465067
25%          4.657655
50%          9.193014
75%         13.680920
max         20.969489
Name: distance, dtype: float64

In [8]:
def bin_distance(df:pd.DataFrame,num_col):
    distance_labels=['short','medium','long','very long']
    distance_bins=[1,6,12,18,24]
    df['distance_bins']=pd.cut(df[num_col],bins=distance_bins,labels=distance_labels,right=False)
    return df

In [9]:
bin_distance(swiggy,'distance')

Unnamed: 0,rider_id,age,ratings,restaurant_latitude,restaurant_longitude,delivery_latitude,delivery_longitude,order_date,weather,traffic,...,city_name,order_day,order_month,order_day_of_week,is_weekend,pickup_time_minutes,order_time_hour,order_time_of_day,distance,distance_bins
0,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,sunny,high,...,INDO,19,3,saturday,1,15.0,11.0,morning,3.025149,short
1,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,stormy,jam,...,BANG,25,3,friday,0,5.0,19.0,evening,20.183530,very long
2,BANGRES19DEL01,23.0,4.4,12.914264,77.678400,12.924264,77.688400,2022-03-19,sandstorms,low,...,BANG,19,3,saturday,1,15.0,8.0,morning,1.552758,short
3,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,sunny,medium,...,COIMB,5,4,tuesday,0,10.0,18.0,evening,7.790401,medium
4,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,cloudy,high,...,CHEN,26,3,saturday,1,15.0,13.0,afternoon,6.210138,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45497,JAPRES04DEL01,30.0,4.8,26.902328,75.794257,26.912328,75.804257,2022-03-24,windy,high,...,JAP,24,3,thursday,0,10.0,11.0,morning,1.489846,short
45498,AGRRES16DEL01,21.0,4.6,,,,,2022-02-16,windy,jam,...,AGR,16,2,wednesday,0,15.0,19.0,evening,,
45499,CHENRES08DEL03,30.0,4.9,13.022394,80.242439,13.052394,80.272439,2022-03-11,cloudy,low,...,CHEN,11,3,friday,0,15.0,23.0,night,4.657195,short
45500,COIMBRES11DEL01,20.0,4.7,11.001753,76.986241,11.041753,77.026241,2022-03-07,cloudy,high,...,COIMB,7,3,monday,0,5.0,13.0,afternoon,6.232393,medium


In [10]:
swiggy['order_time_hour'].describe()

count    43862.000000
mean        17.423966
std          4.817856
min          0.000000
25%         15.000000
50%         19.000000
75%         21.000000
max         23.000000
Name: order_time_hour, dtype: float64

In [11]:
def bin_time(df:pd.DataFrame,num_col):
    time_labels=['after_midnight','morning','afternoon','evening','night']
    time_bins=[0,6,12,17,20,24]
    df['order_time_bins']=pd.cut(df[num_col],bins=time_bins,labels=time_labels,right=False)
    return df

In [12]:
bin_time(swiggy,'order_time_hour')

Unnamed: 0,rider_id,age,ratings,restaurant_latitude,restaurant_longitude,delivery_latitude,delivery_longitude,order_date,weather,traffic,...,order_day,order_month,order_day_of_week,is_weekend,pickup_time_minutes,order_time_hour,order_time_of_day,distance,distance_bins,order_time_bins
0,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,sunny,high,...,19,3,saturday,1,15.0,11.0,morning,3.025149,short,morning
1,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,stormy,jam,...,25,3,friday,0,5.0,19.0,evening,20.183530,very long,evening
2,BANGRES19DEL01,23.0,4.4,12.914264,77.678400,12.924264,77.688400,2022-03-19,sandstorms,low,...,19,3,saturday,1,15.0,8.0,morning,1.552758,short,morning
3,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,sunny,medium,...,5,4,tuesday,0,10.0,18.0,evening,7.790401,medium,evening
4,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,cloudy,high,...,26,3,saturday,1,15.0,13.0,afternoon,6.210138,medium,afternoon
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45497,JAPRES04DEL01,30.0,4.8,26.902328,75.794257,26.912328,75.804257,2022-03-24,windy,high,...,24,3,thursday,0,10.0,11.0,morning,1.489846,short,morning
45498,AGRRES16DEL01,21.0,4.6,,,,,2022-02-16,windy,jam,...,16,2,wednesday,0,15.0,19.0,evening,,,evening
45499,CHENRES08DEL03,30.0,4.9,13.022394,80.242439,13.052394,80.272439,2022-03-11,cloudy,low,...,11,3,friday,0,15.0,23.0,night,4.657195,short,night
45500,COIMBRES11DEL01,20.0,4.7,11.001753,76.986241,11.041753,77.026241,2022-03-07,cloudy,high,...,7,3,monday,0,5.0,13.0,afternoon,6.232393,medium,afternoon


In [13]:
df1=swiggy.copy().dropna()
df1

Unnamed: 0,rider_id,age,ratings,restaurant_latitude,restaurant_longitude,delivery_latitude,delivery_longitude,order_date,weather,traffic,...,order_day,order_month,order_day_of_week,is_weekend,pickup_time_minutes,order_time_hour,order_time_of_day,distance,distance_bins,order_time_bins
0,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,sunny,high,...,19,3,saturday,1,15.0,11.0,morning,3.025149,short,morning
1,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,stormy,jam,...,25,3,friday,0,5.0,19.0,evening,20.183530,very long,evening
2,BANGRES19DEL01,23.0,4.4,12.914264,77.678400,12.924264,77.688400,2022-03-19,sandstorms,low,...,19,3,saturday,1,15.0,8.0,morning,1.552758,short,morning
3,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,sunny,medium,...,5,4,tuesday,0,10.0,18.0,evening,7.790401,medium,evening
4,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,cloudy,high,...,26,3,saturday,1,15.0,13.0,afternoon,6.210138,medium,afternoon
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45496,RANCHIRES16DEL01,35.0,4.2,23.371292,85.327872,23.481292,85.437872,2022-03-08,windy,jam,...,8,3,tuesday,0,10.0,21.0,night,16.600272,long,night
45497,JAPRES04DEL01,30.0,4.8,26.902328,75.794257,26.912328,75.804257,2022-03-24,windy,high,...,24,3,thursday,0,10.0,11.0,morning,1.489846,short,morning
45499,CHENRES08DEL03,30.0,4.9,13.022394,80.242439,13.052394,80.272439,2022-03-11,cloudy,low,...,11,3,friday,0,15.0,23.0,night,4.657195,short,night
45500,COIMBRES11DEL01,20.0,4.7,11.001753,76.986241,11.041753,77.026241,2022-03-07,cloudy,high,...,7,3,monday,0,5.0,13.0,afternoon,6.232393,medium,afternoon


In [14]:
columns_to_drop=[
    'rider_id',
    'restaurant_latitude',
    'restaurant_longitude',
    'delivery_latitude',
    'delivery_longitude',
    'order_date',
    'order_time_of_day',
    "order_time_hour",
    "order_day",
    "city_name",
    "order_day_of_week",
    "order_month"
]

In [15]:
df1.drop(columns=columns_to_drop,inplace=True)

In [16]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38064 entries, 0 to 45501
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   age                  38064 non-null  float64 
 1   ratings              38064 non-null  float64 
 2   weather              38064 non-null  object  
 3   traffic              38064 non-null  object  
 4   vehicle_condition    38064 non-null  int64   
 5   type_of_order        38064 non-null  object  
 6   type_of_vehicle      38064 non-null  object  
 7   multiple_deliveries  38064 non-null  float64 
 8   festival             38064 non-null  object  
 9   city_type            38064 non-null  object  
 10  time_taken           38064 non-null  int64   
 11  is_weekend           38064 non-null  int64   
 12  pickup_time_minutes  38064 non-null  float64 
 13  distance             38064 non-null  float64 
 14  distance_bins        38064 non-null  category
 15  order_time_bins    

In [17]:
df1.isnull().mean()

age                    0.0
ratings                0.0
weather                0.0
traffic                0.0
vehicle_condition      0.0
type_of_order          0.0
type_of_vehicle        0.0
multiple_deliveries    0.0
festival               0.0
city_type              0.0
time_taken             0.0
is_weekend             0.0
pickup_time_minutes    0.0
distance               0.0
distance_bins          0.0
order_time_bins        0.0
dtype: float64

In [18]:
df1.duplicated().sum()

0

In [19]:
X=df1.drop(columns=['time_taken'])
X

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,is_weekend,pickup_time_minutes,distance,distance_bins,order_time_bins
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,1,15.0,3.025149,short,morning
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,0,5.0,20.183530,very long,evening
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,1,15.0,1.552758,short,morning
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,0,10.0,7.790401,medium,evening
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,1,15.0,6.210138,medium,afternoon
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45496,35.0,4.2,windy,jam,2,drinks,motorcycle,1.0,no,metropolitian,0,10.0,16.600272,long,night
45497,30.0,4.8,windy,high,1,meal,motorcycle,0.0,no,metropolitian,0,10.0,1.489846,short,morning
45499,30.0,4.9,cloudy,low,1,drinks,scooter,0.0,no,metropolitian,0,15.0,4.657195,short,night
45500,20.0,4.7,cloudy,high,0,snack,motorcycle,1.0,no,metropolitian,0,5.0,6.232393,medium,afternoon


In [20]:
Y=df1['time_taken']
Y

0        24
1        33
2        26
3        21
4        30
         ..
45496    33
45497    32
45499    16
45500    26
45501    36
Name: time_taken, Length: 38064, dtype: int64

In [21]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=10)

In [22]:
print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)

The size of train data is (30451, 15)
The shape of test data is (7613, 15)


In [23]:
numerical_col=[cols for cols in df1.columns if df1[cols].dtype in ['int64','float64'] and cols !='time_taken']
numerical_col

['age',
 'ratings',
 'vehicle_condition',
 'multiple_deliveries',
 'is_weekend',
 'pickup_time_minutes',
 'distance']

In [24]:
categorical_col=[cols for cols in df1.columns if df1[cols].dtype in ['object','category']]
categorical_col

['weather',
 'traffic',
 'type_of_order',
 'type_of_vehicle',
 'festival',
 'city_type',
 'distance_bins',
 'order_time_bins']

In [25]:
nominal_col=['weather','type_of_order','type_of_vehicle','festival','distance_bins','order_time_bins']
ordinal_col=['traffic','city_type']

In [26]:
len(numerical_col+nominal_col+ordinal_col)

15

In [27]:
df1['traffic'].unique()

array(['high', 'jam', 'low', 'medium'], dtype=object)

In [28]:
df1['city_type'].unique()

array(['urban', 'metropolitian', 'semi-urban'], dtype=object)

In [29]:
traffic_groups=['low','medium','high','jam']
city_groups=['semi-urban','urban','metropolitian']

In [30]:
preprocessor=ColumnTransformer(
    [
        ("scaler",StandardScaler(),numerical_col),
        ("ohe",OneHotEncoder(drop="first",sparse_output=False,handle_unknown="ignore"),nominal_col),
        ("ordinal",OrdinalEncoder(categories=[traffic_groups,city_groups]),ordinal_col)
    ],remainder='passthrough'
)
preprocessor.set_output(transform="pandas")

In [31]:
base_model=RandomForestRegressor()

In [32]:
pipeline1=Pipeline([
    ("preprocessor",preprocessor),
    ("base_model",base_model)
])

In [33]:
reg=TransformedTargetRegressor(
    regressor=pipeline1,
    transformer=PowerTransformer()
)

In [34]:
reg.fit(X_train,Y_train)



In [35]:
y_pred_test=reg.predict(X_test)
y_pred_train=reg.predict(X_train)

In [36]:
print(f"The train mean absolute error is:{round(mean_absolute_error(Y_train,y_pred_train),2)} min")
print(f"The test mean absolute error is:{round(mean_absolute_error(Y_test,y_pred_test),2)} min")

The train mean absolute error is:1.15 min
The test mean absolute error is:3.14 min


In [37]:
print(f"The train r2 score is:{round(r2_score(Y_train,y_pred_train),2)}")
print(f"The test r2 score is:{round(r2_score(Y_test,y_pred_test),2)}")

The train r2 score is:0.98
The test r2 score is:0.82


In [38]:
pipeline2=Pipeline([
    ("preprocessor",preprocessor),
])

In [39]:
X_train_trans=pipeline2.fit_transform(X_train)
X_test_trans=pipeline2.transform(X_test)

In [40]:
X_train_trans

Unnamed: 0,scaler__age,scaler__ratings,scaler__vehicle_condition,scaler__multiple_deliveries,scaler__is_weekend,scaler__pickup_time_minutes,scaler__distance,ohe__weather_fog,ohe__weather_sandstorms,ohe__weather_stormy,...,ohe__festival_yes,ohe__distance_bins_medium,ohe__distance_bins_short,ohe__distance_bins_very long,ohe__order_time_bins_afternoon,ohe__order_time_bins_evening,ohe__order_time_bins_morning,ohe__order_time_bins_night,ordinal__traffic,ordinal__city_type
42620,-1.319586,0.528127,0.002935,0.437621,-0.615678,0.006393,0.198212,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,2.0
6159,0.936057,-0.104873,-1.221503,0.437621,-0.615678,1.230791,-1.468722,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0
18429,0.415524,0.844627,1.227374,-1.310051,1.624227,0.006393,-0.933730,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,2.0
2206,-1.319586,0.211627,1.227374,0.437621,-0.615678,0.006393,-1.199370,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0
11565,0.762546,-0.737873,0.002935,-1.310051,-0.615678,1.230791,-0.434219,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12203,-0.278520,0.211627,1.227374,0.437621,1.624227,0.006393,-0.633418,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0
11198,-0.452031,1.161127,0.002935,0.437621,-0.615678,1.230791,-0.078964,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,2.0
33459,1.109568,-2.636872,1.227374,2.185294,1.624227,-1.218005,1.861649,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,3.0,1.0
34862,-0.625542,0.844627,0.002935,-1.310051,1.624227,-1.218005,-1.210381,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


#### Stacking Regressor:

In [41]:
# Best parameter for lgbm:
best_param_lgbm={
    'learning_rate': 0.05597085845487942,
    'num_leaves': 85,
    'feature_fraction': 0.8475689756385575,
    'bagging_fraction': 0.845652828843328,
    'bagging_freq': 2,
    'max_depth': 36,
    'min_data_in_leaf': 62,
    'lambda_l1': 0.0038672702290316607,
    'lambda_l2': 0.6632645081461954
    }

# Best parameter for random forest:
best_param_rf={
    'n_estimators': 542,
    'max_depth': 17,
    'min_samples_split': 15,
    'min_samples_leaf': 10,
    'max_features': None,
    'bootstrap': True
    }
best_rf=RandomForestRegressor(**best_param_rf)
best_lgbm=LGBMRegressor(**best_param_lgbm)

In [44]:
def objective(trial):
    with mlflow.start_run():
        model_name = trial.suggest_categorical("model", ["LR", "KNN", "DT"])

        if model_name == "LR":
            fit_intercept = trial.suggest_categorical("fit_intercept", [True, False])
            base_model = LinearRegression(fit_intercept=fit_intercept)

        elif model_name == "KNN":
            n_neighbors = trial.suggest_int("n_neighbors", 3, 11)
            weights = trial.suggest_categorical("weights", ["uniform", "distance"])
            p = trial.suggest_int("p", 1, 2)
            base_model = KNeighborsRegressor(
                n_neighbors=n_neighbors,
                weights=weights,
                p=p
            )

        elif model_name == "DT":
            max_depth = trial.suggest_int("max_depth", 2, 10)
            min_samples_leaf = trial.suggest_int("min_samples_leaf", 2, 10)
            base_model = DecisionTreeRegressor(
                max_depth=max_depth,
                min_samples_leaf=min_samples_leaf,
                random_state=42
            )
        mlflow.log_param("meta_model_name",model_name)
        stacked_regressor=StackingRegressor(
            estimators=[("rf",best_rf),("lgbm",best_lgbm)],
            final_estimator=base_model,n_jobs=-1
        )

        model=TransformedTargetRegressor(
            regressor=stacked_regressor,
            transformer=PowerTransformer("yeo-johnson")
        )

        model.fit(X_train_trans,Y_train)
        y_pred=model.predict(X_test_trans)
        error=mean_absolute_error(Y_test,y_pred)
        mlflow.log_metric("MAE",error)

        return error

In [45]:
study=optuna.create_study(direction="minimize")
with mlflow.start_run():
  study.optimize(objective,n_trials=15,n_jobs=-1)
  mlflow.log_params(study.best_params)
  mlflow.log_metric("best_score",study.best_value)

[I 2025-09-22 13:08:19,521] A new study created in memory with name: no-name-b02e1591-6e51-4dbe-93d3-5b1c028546c0


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009898 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.004507
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007622 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 361
[LightGBM] [Info] Number of data points in the train set: 24360, number of used features: 27
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006796 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info]

[I 2025-09-22 13:12:46,510] Trial 1 finished with value: 3.17534301615592 and parameters: {'model': 'KNN', 'n_neighbors': 10, 'weights': 'distance', 'p': 2}. Best is trial 1 with value: 3.17534301615592.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002893 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 361
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010669 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.004507
[LightGBM] [Info] Number of data points in the train set: 24360, number of used features: 27
[LightGBM] [Info] Start training from score 0.000480
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006421 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough,



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008059 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 361
[LightGBM] [Info] Number of data points in the train set: 24360, number of used features: 27
[LightGBM] [Info] Start training from score 0.000480
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003332 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.004507
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010964 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins

[I 2025-09-22 13:20:28,594] Trial 0 finished with value: 3.0352705619850266 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 0 with value: 3.0352705619850266.


🏃 View run bedecked-mink-445 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/4f96a233117f429b83f6b7f6056aa503
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:20:34,623] Trial 2 finished with value: 3.0661471171486774 and parameters: {'model': 'DT', 'max_depth': 4, 'min_samples_leaf': 7}. Best is trial 0 with value: 3.0352705619850266.


🏃 View run mercurial-gnat-216 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/f77d1b7d15de447b9670f5299e244d2e
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:20:42,479] Trial 3 finished with value: 3.0358165993850394 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 0 with value: 3.0352705619850266.


🏃 View run colorful-lark-813 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/97fa8b791e77424fa76acdebf07c14a2
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:20:52,646] Trial 4 finished with value: 3.0361489161975865 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 0 with value: 3.0352705619850266.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002870 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 361
[LightGBM] [Info] Number of data points in the train set: 24360, number of used features: 27
[LightGBM] [Info] Start training from score 0.000480
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004137 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.006090
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005301 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006042 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.004507
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002856 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 361
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score -0.008248
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009598 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough

[I 2025-09-22 13:28:36,257] Trial 5 finished with value: 3.4731285908185767 and parameters: {'model': 'KNN', 'n_neighbors': 3, 'weights': 'distance', 'p': 1}. Best is trial 0 with value: 3.0352705619850266.


🏃 View run gaudy-owl-338 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/7d7a9cf09ce247988676c1f587fe30b0
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:28:37,320] Trial 6 finished with value: 3.0352675695505846 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run kindly-snake-81 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/903176e15ea54c1da45566ab050708ab
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:28:47,128] Trial 7 finished with value: 3.0359348676594773 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run skittish-shad-136 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/8d67f1b0738144238bd017ba507067e5
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:28:56,130] Trial 8 finished with value: 3.228513305259485 and parameters: {'model': 'KNN', 'n_neighbors': 7, 'weights': 'distance', 'p': 2}. Best is trial 6 with value: 3.0352675695505846.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006484 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.004507
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010898 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 24361, number of used features: 27
[LightGBM] [Info] Start training from score 0.006090
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008778 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins

[I 2025-09-22 13:36:35,346] Trial 9 finished with value: 3.0354859617992096 and parameters: {'model': 'LR', 'fit_intercept': True}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run fortunate-colt-712 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/78f7f0bd61fd4acf8fcb27869118ff61
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:36:42,370] Trial 10 finished with value: 3.3681795266171815 and parameters: {'model': 'KNN', 'n_neighbors': 4, 'weights': 'distance', 'p': 2}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run secretive-pug-940 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/0329a43552cb43e79840c21ac4938460
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:36:46,876] Trial 11 finished with value: 3.4441931477212653 and parameters: {'model': 'KNN', 'n_neighbors': 3, 'weights': 'distance', 'p': 2}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run receptive-smelt-804 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/13246e5393654f47ae4a312cbca174d1
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:36:54,238] Trial 12 finished with value: 3.036558369685623 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 6 with value: 3.0352675695505846.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 30451, number of used features: 27
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044242 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 362
[LightGBM] [Info] Number of data points in the train set: 30451, number of used features: 27
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014687 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins

[I 2025-09-22 13:39:51,117] Trial 13 finished with value: 3.1031855119555436 and parameters: {'model': 'DT', 'max_depth': 10, 'min_samples_leaf': 2}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run silent-ant-663 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/644723341e2c44b889fbee295a2e47ee
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


[I 2025-09-22 13:39:58,248] Trial 14 finished with value: 3.0362201448715984 and parameters: {'model': 'LR', 'fit_intercept': False}. Best is trial 6 with value: 3.0352675695505846.


🏃 View run nebulous-hawk-147 at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3/runs/4c9cf9f5046e408e8ba0b6858918719a
🧪 View experiment at: https://dagshub.com/Arej02/swiggy_delivery_time_prediction.mlflow/#/experiments/3


In [46]:
study.best_value

3.0352675695505846

In [47]:
best_trial = study.best_trial
print("Best trial parameters:", best_trial.params)
print("Best trial accuracy:", best_trial.value)

Best trial parameters: {'model': 'LR', 'fit_intercept': False}
Best trial accuracy: 3.0352675695505846
