In [1]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer, KNNImputer, MissingIndicator
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder, MinMaxScaler, PowerTransformer, OrdinalEncoder
from sklearn.model_selection import train_test_split

In [2]:
%pip install mlflow dagshub

Collecting mlflow
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting dagshub
  Downloading dagshub-0.5.9-py3-none-any.whl.metadata (12 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow)
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.22.0->mlflow)
  Downloading databricks_sdk-0.50.0-py3-none-any.whl.metadata (38 kB)
Collecting fastapi<1 (from mlflow-skinny==2.22.0->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skin

In [3]:
import mlflow

In [4]:
import dagshub
dagshub.init(repo_owner='speedyskill', repo_name='swiggy-delivery-time-prediction', mlflow=True)



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=6d5e5097-975e-496d-8719-8f3c97f7ee66&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=4270863549214ea69ee3fdacf6b15848e49885a758f869323d6c5f496f4351c0




Output()

In [5]:
mlflow.set_tracking_uri('https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow')

In [6]:
mlflow.set_experiment('Exp 2 - Model Selection')

<Experiment: artifact_location='mlflow-artifacts:/9b833494a0e441159667128053be3de6', creation_time=1745840364853, experiment_id='1', last_update_time=1745840364853, lifecycle_stage='active', name='Exp 2 - Model Selection', tags={}>

In [7]:
df=pd.read_csv('/content/cleaned_data.csv')
df.head()

Unnamed: 0,rider_id,age,ratings,restaurant_latitude,restaurant_longitude,delivery_latitude,delivery_longitude,order_date,weather,traffic,...,city_name,order_day,order_month,order_day_of_week,is_weekend,order_time_hour,order_time_of_day,pickup_time,distance,distance_type
0,INDORES13DEL02,37.0,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,sunny,high,...,INDO,19,3,saturday,1,11.0,morning,15.0,3.025149,short
1,BANGRES18DEL02,34.0,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,stormy,jam,...,BANG,25,3,friday,0,19.0,evening,5.0,20.18353,very_long
2,BANGRES19DEL01,23.0,4.4,12.914264,77.6784,12.924264,77.6884,2022-03-19,sandstorms,low,...,BANG,19,3,saturday,1,8.0,morning,15.0,1.552758,short
3,COIMBRES13DEL02,38.0,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,sunny,medium,...,COIMB,5,4,tuesday,0,18.0,evening,10.0,7.790401,medium
4,CHENRES12DEL01,32.0,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,cloudy,high,...,CHEN,26,3,saturday,1,13.0,afternoon,15.0,6.210138,medium


In [8]:
# drop columns not required for model input

columns_to_drop =  ['rider_id',
                    'restaurant_latitude',
                    'restaurant_longitude',
                    'delivery_latitude',
                    'delivery_longitude',
                    'order_date',
                    "order_time_hour",
                    "order_day",
                    "city_name",
                    "order_day_of_week",
                    "order_month"]

df.drop(columns=columns_to_drop, inplace=True)

df.head()

Unnamed: 0,age,ratings,weather,traffic,vehicle_condition,type_of_order,type_of_vehicle,multiple_deliveries,festival,city_type,time_taken,is_weekend,order_time_of_day,pickup_time,distance,distance_type
0,37.0,4.9,sunny,high,2,snack,motorcycle,0.0,no,urban,24,1,morning,15.0,3.025149,short
1,34.0,4.5,stormy,jam,2,snack,scooter,1.0,no,metropolitian,33,0,evening,5.0,20.18353,very_long
2,23.0,4.4,sandstorms,low,0,drinks,motorcycle,1.0,no,urban,26,1,morning,15.0,1.552758,short
3,38.0,4.7,sunny,medium,0,buffet,motorcycle,1.0,no,metropolitian,21,0,evening,10.0,7.790401,medium
4,32.0,4.6,cloudy,high,1,snack,scooter,1.0,no,metropolitian,30,1,afternoon,15.0,6.210138,medium


In [9]:
temp_df=df.dropna().copy()

In [10]:
temp_df.isna().sum().sum()

np.int64(0)

In [11]:
X=temp_df.drop(columns=['time_taken'])
y=temp_df['time_taken']

In [12]:
# train test split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [13]:
print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)

The size of train data is (30156, 15)
The shape of test data is (7539, 15)


# Preprocessing Pipeline

In [14]:
num_cols=['age','ratings','pickup_time','distance']

nominal_cat_cols = ['weather',
                    'type_of_order',
                    'type_of_vehicle',
                    "festival",
                    "city_type",
                    "is_weekend",
                    "order_time_of_day"]

ordinal_cat_cols = ["traffic","distance_type"]

In [15]:
# generate order for ordinal encoding

traffic_order = ["low","medium","high","jam"]

distance_type_order = ["short","medium","long","very_long"]

In [16]:
preprocessor=ColumnTransformer(transformers=[
    ('scale',MinMaxScaler(),num_cols),
    ('nominal_encode',OneHotEncoder(drop='first',handle_unknown='ignore',sparse_output=False),nominal_cat_cols),
    ('ordinal_encode',OrdinalEncoder(categories=[traffic_order,distance_type_order]),ordinal_cat_cols)
],remainder='passthrough',n_jobs=-1,force_int_remainder_cols=False,verbose_feature_names_out=False)

In [17]:
preprocessor.set_output(transform='pandas')

In [18]:
X_train_trans = preprocessor.fit_transform(X_train)

X_test_trans = preprocessor.transform(X_test)

In [19]:
pt=PowerTransformer(method='yeo-johnson')
y_train_pt=pt.fit_transform(y_train.values.reshape(-1,1))
y_test_pt=pt.transform(y_test.values.reshape(-1,1))

In [20]:
%pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.3.0


In [21]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna

In [22]:
from sklearn.metrics import r2_score, mean_absolute_error

In [23]:
def objective(trial):
    with mlflow.start_run(nested=True):
        model_name = trial.suggest_categorical("model",["SVM","RF","KNN","GB","XGB","LGBM"])

        if model_name == "SVM":
            kernel_svm = trial.suggest_categorical("kernel_svm",["linear","poly","rbf"])
            if kernel_svm == "linear":
                c_linear = trial.suggest_float("c_linear",0,10)
                model = SVR(C=c_linear,kernel="linear")

            elif kernel_svm == "poly":
                c_poly = trial.suggest_float("c_poly",0,10)
                degree_poly = trial.suggest_int("degree_poly",1,5)
                model = SVR(C=c_poly,degree=degree_poly,
                            kernel="poly")

            else:
                c_rbf = trial.suggest_float("c_rbf",0,100)
                gamma_rbf = trial.suggest_float("gamma_rbf",0,10)
                model = SVR(C=c_rbf,gamma=gamma_rbf,
                            kernel="rbf")

        elif model_name == "RF":
            n_estimators_rf = trial.suggest_int("n_estimators_rf",10,200)
            max_depth_rf = trial.suggest_int("max_depth_rf",2,20)
            model = RandomForestRegressor(n_estimators=n_estimators_rf,
                                        max_depth=max_depth_rf,
                                        random_state=42,
                                        n_jobs=-1)

        elif model_name == "GB":
            n_estimators_gb = trial.suggest_int("n_estimators_gb",10,200)
            learning_rate_gb = trial.suggest_float("learning_rate_gb",0,1)
            max_depth_gb = trial.suggest_int("max_depth_gb",2,20)
            model = GradientBoostingRegressor(n_estimators=n_estimators_gb,
                                                learning_rate=learning_rate_gb,
                                                max_depth=max_depth_gb,
                                                random_state=42)

        elif model_name == "KNN":
            n_neighbors_knn = trial.suggest_int("n_neighbors_knn",1,25)
            weights_knn = trial.suggest_categorical("weights_knn",["uniform","distance"])
            model = KNeighborsRegressor(n_neighbors=n_neighbors_knn,
                                        weights=weights_knn,n_jobs=-1)

        elif model_name == "XGB":
            n_estimators_xgb = trial.suggest_int("n_estimators_xgb",10,200)
            learning_rate_xgb = trial.suggest_float("learning_rate_xgb",0.1,0.5)
            max_depth_xgb = trial.suggest_int("max_depth_xgb",2,20)
            model = XGBRegressor(n_estimators=n_estimators_xgb,
                                    learning_rate=learning_rate_xgb,
                                    max_depth=max_depth_xgb,
                                    random_state=42,
                                    n_jobs=-1)

        elif model_name == "LGBM":
            n_estimators_lgbm = trial.suggest_int("n_estimators_lgbm",10,200)
            learning_rate_lgbm = trial.suggest_float("learning_rate_lgbm",0.1,0.5)
            max_depth_lgbm = trial.suggest_int("max_depth_lgbm",2,20)
            model = LGBMRegressor(n_estimators=n_estimators_lgbm,
                                    learning_rate=learning_rate_lgbm,
                                    max_depth=max_depth_lgbm,
                                    random_state=42)


        # train the model
        model.fit(X_train_trans,y_train_pt.ravel())

        # log model params
        mlflow.log_params(model.get_params())

        # get the predictions
        y_pred_train = model.predict(X_train_trans)
        y_pred_test = model.predict(X_test_trans)

        # get the actual predictions values
        y_pred_train_actual = pt.inverse_transform(y_pred_train.reshape(-1,1))
        y_pred_test_actual = pt.inverse_transform(y_pred_test.reshape(-1,1))

        # calculate the error
        error = mean_absolute_error(y_test,y_pred_test_actual)

        # log model_name
        mlflow.log_param("model",model_name)

        # log error
        mlflow.log_metric("MAE",error)

        return error

In [27]:
# create optuna study
study = optuna.create_study(direction="minimize",study_name="model_selection")

with mlflow.start_run(run_name="Best Model") as parent:
    # optimize the objective function
    study.optimize(objective,n_trials=25,n_jobs=-1)

    # log the best parameters
    mlflow.log_params(study.best_params)

    # log the best score
    mlflow.log_metric("best_score",study.best_value)

[I 2025-04-28 14:38:34,026] A new study created in memory with name: model_selection
[I 2025-04-28 14:38:35,420] Trial 0 finished with value: 3.8867995738983154 and parameters: {'model': 'XGB', 'n_estimators_xgb': 25, 'learning_rate_xgb': 0.15521042281615305, 'max_depth_xgb': 3}. Best is trial 0 with value: 3.8867995738983154.


🏃 View run funny-cub-516 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/b8a5aa74e2114d9baaf02c316e398dda
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:38:39,142] Trial 2 finished with value: 3.7009064589721943 and parameters: {'model': 'RF', 'n_estimators_rf': 33, 'max_depth_rf': 7}. Best is trial 2 with value: 3.7009064589721943.


🏃 View run delightful-lamb-832 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/8dd8068a52a441d4b452b3cd0c0f9b4e
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1
🏃 View run crawling-crow-427 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/ae704249f6bd44d4bf3d6dcf4224da56
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:38:43,171] Trial 3 finished with value: 3.1177341202557805 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 136, 'learning_rate_lgbm': 0.44120583899013177, 'max_depth_lgbm': 18}. Best is trial 3 with value: 3.1177341202557805.


🏃 View run gaudy-shad-121 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/7b557aa588b642c4bff9f150d5dcc8f0
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:38:49,218] Trial 4 finished with value: 3.061084054966845 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 96, 'learning_rate_lgbm': 0.3250393349196531, 'max_depth_lgbm': 14}. Best is trial 4 with value: 3.061084054966845.


🏃 View run classy-croc-495 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/c97d92989a504acc9a1c832950db0a7f
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:38:55,165] Trial 5 finished with value: 3.0222214956777944 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 123, 'learning_rate_lgbm': 0.14870778914988292, 'max_depth_lgbm': 20}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run learned-cat-726 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/7a008765f11f40ef940ac4d02dd92ae7
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:39:01,163] Trial 6 finished with value: 3.5396106243133545 and parameters: {'model': 'XGB', 'n_estimators_xgb': 38, 'learning_rate_xgb': 0.26201021365932886, 'max_depth_xgb': 3}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run rogue-snipe-159 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/5571cf27133c41e082c9b83075eff43d
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:39:07,180] Trial 7 finished with value: 3.0498989852022884 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 45, 'learning_rate_lgbm': 0.3479777056554947, 'max_depth_lgbm': 18}. Best is trial 5 with value: 3.0222214956777944.
[I 2025-04-28 14:39:35,324] Trial 8 finished with value: 3.3721351198141347 and parameters: {'model': 'GB', 'n_estimators_gb': 90, 'learning_rate_gb': 0.37152758646366046, 'max_depth_gb': 11}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run abrasive-cub-711 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/cc2c9fcd58d84b1fad119ca9dcd2d4b0
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:39:38,894] Trial 9 finished with value: 4.677143936702477 and parameters: {'model': 'RF', 'n_estimators_rf': 55, 'max_depth_rf': 4}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run dazzling-ray-367 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/cc7bb0cd7bc24af5b02cb0665e1c0ac4
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:39:41,185] Trial 10 finished with value: 3.1194831548179724 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 195, 'learning_rate_lgbm': 0.41971469271903816, 'max_depth_lgbm': 15}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run clumsy-cod-993 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/8a1980c99b144300ba1b865eda8286e6
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:43:02,959] Trial 1 finished with value: 4.675409126616301 and parameters: {'model': 'SVM', 'kernel_svm': 'linear', 'c_linear': 1.1727770605414811}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run likeable-rook-828 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/0cf41b1634ac42be91b0920d3ca680af
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:43:19,661] Trial 12 finished with value: 4.406993632803629 and parameters: {'model': 'KNN', 'n_neighbors_knn': 4, 'weights_knn': 'uniform'}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run silent-bear-244 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/237c0f8aa66e4c7ab61d333c9509c069
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:43:20,955] Trial 13 finished with value: 3.976233639779468 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 11, 'learning_rate_lgbm': 0.11692330110742263, 'max_depth_lgbm': 20}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run dapper-stork-752 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/ac0fec4bd2f54904828c20cac9da390a
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1
🏃 View run placid-squid-886 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/7c00b05c110448a9b28596c44e1f953a
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


[I 2025-04-28 14:43:24,197] Trial 14 finished with value: 3.0867804938930634 and parameters: {'model': 'LGBM', 'n_estimators_lgbm': 62, 'learning_rate_lgbm': 0.13089915850038816, 'max_depth_lgbm': 6}. Best is trial 5 with value: 3.0222214956777944.
[I 2025-04-28 14:43:35,191] Trial 15 finished with value: 4.121121532993932 and parameters: {'model': 'GB', 'n_estimators_gb': 188, 'learning_rate_gb': 0.9941614980872604, 'max_depth_gb': 20}. Best is trial 5 with value: 3.0222214956777944.


🏃 View run traveling-cod-214 at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/0527cb34de57434f816f474e7052245f
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1
🏃 View run Best Model at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1/runs/3cb5d4da0e964fe1b58cf56cb6b1b805
🧪 View experiment at: https://dagshub.com/speedyskill/swiggy-delivery-time-prediction.mlflow/#/experiments/1


KeyboardInterrupt: 

In [28]:
study.best_params

{'model': 'LGBM',
 'n_estimators_lgbm': 123,
 'learning_rate_lgbm': 0.14870778914988292,
 'max_depth_lgbm': 20}

In [29]:
study.best_value

3.0222214956777944

In [30]:
lgbm_param={
    'n_estimators' : 123,
    'learning_rate' : 0.14870778914988292,
    'max_depth' : 20

}

In [34]:
# train the model on best parameters

lgbm=LGBMRegressor(**lgbm_param)
lgbm.fit(X_train_trans,y_train_pt)



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044056 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 352
[LightGBM] [Info] Number of data points in the train set: 30156, number of used features: 25
[LightGBM] [Info] Start training from score -0.000000


In [35]:
# get the predictions
y_pred_train = lgbm.predict(X_train_trans)
y_pred_test = lgbm.predict(X_test_trans)

In [36]:
# get the actual predictions values

y_pred_train_actual = pt.inverse_transform(y_pred_train.reshape(-1,1))
y_pred_test_actual = pt.inverse_transform(y_pred_test.reshape(-1,1))

In [38]:
from sklearn.metrics import mean_absolute_error, r2_score

print(f"The train error is {mean_absolute_error(y_train,y_pred_train_actual):.2f} minutes")
print(f"The test error is {mean_absolute_error(y_test,y_pred_test_actual):.2f} minutes")

The train error is 2.86 minutes
The test error is 3.02 minutes


In [39]:
print(f"The train r2 score is {r2_score(y_train,y_pred_train_actual):.2f}")
print(f"The test r2 score is {r2_score(y_test,y_pred_test_actual):.2f}")

The train r2 score is 0.86
The test r2 score is 0.84


In [41]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_c_linear,params_c_poly,params_degree_poly,params_kernel_svm,params_learning_rate_gb,...,params_max_depth_rf,params_max_depth_xgb,params_model,params_n_estimators_gb,params_n_estimators_lgbm,params_n_estimators_rf,params_n_estimators_xgb,params_n_neighbors_knn,params_weights_knn,state
0,0,3.8868,2025-04-28 14:38:34.181011,2025-04-28 14:38:35.420537,0 days 00:00:01.239526,,,,,,...,,3.0,XGB,,,,25.0,,,COMPLETE
1,1,4.675409,2025-04-28 14:38:34.186943,2025-04-28 14:43:02.959440,0 days 00:04:28.772497,1.172777,,,linear,,...,,,SVM,,,,,,,COMPLETE
2,2,3.700906,2025-04-28 14:38:35.424210,2025-04-28 14:38:39.141969,0 days 00:00:03.717759,,,,,,...,7.0,,RF,,,33.0,,,,COMPLETE
3,3,3.117734,2025-04-28 14:38:39.144073,2025-04-28 14:38:43.171187,0 days 00:00:04.027114,,,,,,...,,,LGBM,,136.0,,,,,COMPLETE
4,4,3.061084,2025-04-28 14:38:43.173660,2025-04-28 14:38:49.218276,0 days 00:00:06.044616,,,,,,...,,,LGBM,,96.0,,,,,COMPLETE
5,5,3.022221,2025-04-28 14:38:49.220855,2025-04-28 14:38:55.165286,0 days 00:00:05.944431,,,,,,...,,,LGBM,,123.0,,,,,COMPLETE
6,6,3.539611,2025-04-28 14:38:55.169162,2025-04-28 14:39:01.163092,0 days 00:00:05.993930,,,,,,...,,3.0,XGB,,,,38.0,,,COMPLETE
7,7,3.049899,2025-04-28 14:39:01.168263,2025-04-28 14:39:07.180601,0 days 00:00:06.012338,,,,,,...,,,LGBM,,45.0,,,,,COMPLETE
8,8,3.372135,2025-04-28 14:39:07.185053,2025-04-28 14:39:35.323900,0 days 00:00:28.138847,,,,,0.371528,...,,,GB,90.0,,,,,,COMPLETE
9,9,4.677144,2025-04-28 14:39:35.327887,2025-04-28 14:39:38.894316,0 days 00:00:03.566429,,,,,,...,4.0,,RF,,,55.0,,,,COMPLETE


In [42]:
study.trials_dataframe()['params_model'].value_counts()

Unnamed: 0_level_0,count
params_model,Unnamed: 1_level_1
LGBM,7
SVM,3
XGB,2
RF,2
GB,2
KNN,1


In [43]:
from sklearn.model_selection import cross_val_score
from sklearn.compose import TransformedTargetRegressor

model=TransformedTargetRegressor(regressor=lgbm,transformer=pt)

In [44]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model,
                         X_train_trans,
                         y_train,
                         scoring="neg_mean_absolute_error",
                         cv=5,n_jobs=-1)

scores

array([-3.08756255, -3.04730346, -3.0613757 , -3.06638251, -3.0471044 ])

In [45]:
# mean score

- scores.mean()

np.float64(3.0619457231096985)

In [46]:
# optimization history plot

optuna.visualization.plot_optimization_history(study)

In [47]:
optuna.visualization.plot_parallel_coordinate(study,params=["model"])