In [6]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('Bengaluru_House_Data.csv')
df.sample(5)

Unnamed: 0,area_type,availability,location,size,society,total_sqft,bath,balcony,price
11387,Super built-up Area,Ready To Move,Kambipura,2 BHK,Prrtht,883,2.0,1.0,45.0
12695,Super built-up Area,Ready To Move,7th Phase JP Nagar,2 BHK,Prhna S,1128,2.0,1.0,60.0
7869,Plot Area,Ready To Move,Vaishnavi Layout,4 Bedroom,Prsisge,4800,5.0,2.0,420.0
1697,Plot Area,Ready To Move,Ramagondanahalli,1 Bedroom,,540,1.0,0.0,30.0
4476,Super built-up Area,Ready To Move,7th Phase JP Nagar,2 BHK,BriumMi,1215,2.0,2.0,90.0


In [3]:
df.isnull().mean()*100

area_type        0.000000
availability     0.000000
location         0.007508
size             0.120120
society         41.306306
total_sqft       0.000000
bath             0.548048
balcony          4.572072
price            0.000000
dtype: float64

In [4]:
df.dtypes

area_type        object
availability     object
location         object
size             object
society          object
total_sqft       object
bath            float64
balcony         float64
price           float64
dtype: object

In [11]:
df['balcony']=df['balcony'].fillna(np.mean(df['balcony']))
df['bath']=df['bath'].fillna(np.mean(df['bath']))
df['total_sqft'].unique()

array(['1056', '2600', '1440', ..., '1133 - 1384', '774', '4689'],
      shape=(2117,), dtype=object)

In [12]:
def convert_sqft(x):
    try:
        if '-' in x:
            parts = x.split('-')
            return (float(parts[0]) + float(parts[1])) / 2
        else:
            return float(x)
    except:
        return np.nan  # In case of unexpected bad data

# Apply function
df['total_sqft'] = df['total_sqft'].apply(convert_sqft)

In [17]:
df['total_sqft']=df['total_sqft'].fillna(np.mean(df['total_sqft']))
df.isnull().mean()*100

area_type        0.000000
availability     0.000000
location         0.007508
size             0.120120
society         41.306306
total_sqft       0.000000
bath             0.000000
balcony          0.000000
price            0.000000
dtype: float64

In [20]:
df.sample(5)

Unnamed: 0,area_type,availability,location,size,society,total_sqft,bath,balcony,price
10130,Built-up Area,Ready To Move,Jigani,3 BHK,DLhtsen,1252.0,3.0,2.0,59.0
4290,Super built-up Area,Ready To Move,Hormavu,2 BHK,NStusD,1153.0,2.0,1.0,65.0
4822,Plot Area,Ready To Move,Upkar Layout,7 Bedroom,,1200.0,7.0,3.0,169.0
9185,Plot Area,Ready To Move,Mico Layout,4 Bedroom,,3600.0,4.0,1.584376,330.0
10303,Super built-up Area,18-May,Electronic City Phase II,2 BHK,MJtroty,972.0,2.0,1.0,40.0


In [21]:
df.drop(columns=['size','society'],inplace=True)

In [25]:
df.describe()

Unnamed: 0,total_sqft,bath,balcony,price
count,13320.0,13320.0,13320.0,13320.0
mean,1559.626694,2.69261,1.584376,112.565627
std,1236.26486,1.337777,0.79836,148.971674
min,1.0,1.0,0.0,8.0
25%,1100.0,2.0,1.0,50.0
50%,1279.5,2.0,2.0,72.0
75%,1678.0,3.0,2.0,120.0
max,52272.0,40.0,3.0,3600.0


In [38]:
df=df[df['bath']<15]
df=df[df['total_sqft']<30000]
df=df[df['price']<2300]

In [39]:
df.describe()

Unnamed: 0,total_sqft,bath,balcony,price
count,13300.0,13300.0,13300.0,13300.0
mean,1538.205982,2.680944,1.584195,110.527229
std,938.232016,1.253614,0.798172,131.434317
min,1.0,1.0,0.0,8.0
25%,1100.0,2.0,1.0,50.0
50%,1278.0,2.0,2.0,72.0
75%,1674.0,3.0,2.0,120.0
max,26136.0,14.0,3.0,2250.0


In [None]:
df['price'] = np.log1p(df['price'])


## Model Training

In [44]:
x=df.drop(columns='price')
y=df['price']
df.dtypes

area_type        object
availability     object
location         object
total_sqft      float64
bath            float64
balcony         float64
price           float64
dtype: object

In [46]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import optuna
import xgboost as xgb
from sklearn.metrics import r2_score

In [None]:
categorical_cols = ['area_type', 'availability', 'location']
numerical_cols = ['total_sqft', 'bath', 'balcony']

preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ('num', StandardScaler(), numerical_cols)
])


In [49]:
def objective(trial):
    model_name = trial.suggest_categorical('model', ['LinearRegression', 'RandomForest', 'XGBoost'])
    test_size = trial.suggest_float('test_size', 0.1, 0.3)
    random_state = trial.suggest_int('random_state', 1, 100)

    X_train, X_valid, y_train, y_valid = train_test_split(x, y, test_size=test_size, random_state=random_state)

    if model_name == 'LinearRegression':
        model = LinearRegression()
        
    elif model_name == 'RandomForest':
        n_estimators = trial.suggest_int('rf_n_estimators', 100, 500)
        max_depth = trial.suggest_int('rf_max_depth', 3, 20)
        min_samples_split = trial.suggest_int('rf_min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('rf_min_samples_leaf', 1, 5)
        model = RandomForestRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            random_state=random_state,
            n_jobs=-1
        )

    elif model_name == 'XGBoost':
        n_estimators = trial.suggest_int('xgb_n_estimators', 100, 500)
        max_depth = trial.suggest_int('xgb_max_depth', 3, 15)
        learning_rate = trial.suggest_float('xgb_learning_rate', 0.01, 0.3)
        subsample = trial.suggest_float('xgb_subsample', 0.5, 1.0)
        colsample_bytree = trial.suggest_float('xgb_colsample_bytree', 0.5, 1.0)
        
        model = xgb.XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            random_state=random_state,
            n_jobs=-1
        )

    pipe = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])

    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_valid)
    score=r2_score(y_valid,y_pred)
    return score

In [50]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100,show_progress_bar=True)

[I 2025-07-10 18:07:30,337] A new study created in memory with name: no-name-12f2b9fb-2dac-4a32-8c22-978c4122e6d5
Best trial: 0. Best value: 0.760051:   1%|          | 1/100 [00:06<09:58,  6.04s/it]

[I 2025-07-10 18:07:36,379] Trial 0 finished with value: 0.760051154918895 and parameters: {'model': 'RandomForest', 'test_size': 0.24519675941096514, 'random_state': 53, 'rf_n_estimators': 500, 'rf_max_depth': 12, 'rf_min_samples_split': 5, 'rf_min_samples_leaf': 3}. Best is trial 0 with value: 0.760051154918895.


Best trial: 1. Best value: 0.78139:   3%|▎         | 3/100 [00:12<05:28,  3.39s/it] 

[I 2025-07-10 18:07:42,583] Trial 1 finished with value: 0.7813901305188049 and parameters: {'model': 'RandomForest', 'test_size': 0.14082104874021403, 'random_state': 20, 'rf_n_estimators': 390, 'rf_max_depth': 14, 'rf_min_samples_split': 9, 'rf_min_samples_leaf': 4}. Best is trial 1 with value: 0.7813901305188049.
[I 2025-07-10 18:07:42,699] Trial 2 finished with value: 0.7415600045030983 and parameters: {'model': 'LinearRegression', 'test_size': 0.22350813059052074, 'random_state': 19}. Best is trial 1 with value: 0.7813901305188049.


Best trial: 3. Best value: 0.832156:   6%|▌         | 6/100 [00:13<01:46,  1.13s/it]

[I 2025-07-10 18:07:43,324] Trial 3 finished with value: 0.8321559155971131 and parameters: {'model': 'XGBoost', 'test_size': 0.16194088139124296, 'random_state': 55, 'xgb_n_estimators': 286, 'xgb_max_depth': 9, 'xgb_learning_rate': 0.27382980583738653, 'xgb_subsample': 0.639673080420843, 'xgb_colsample_bytree': 0.7481005033685857}. Best is trial 3 with value: 0.8321559155971131.
[I 2025-07-10 18:07:43,418] Trial 4 finished with value: 0.6656836850789121 and parameters: {'model': 'LinearRegression', 'test_size': 0.27063162593533735, 'random_state': 61}. Best is trial 3 with value: 0.8321559155971131.
[I 2025-07-10 18:07:43,522] Trial 5 finished with value: 0.7677800315407113 and parameters: {'model': 'LinearRegression', 'test_size': 0.10904343365130226, 'random_state': 25}. Best is trial 3 with value: 0.8321559155971131.


Best trial: 3. Best value: 0.832156:   8%|▊         | 8/100 [00:15<01:37,  1.06s/it]

[I 2025-07-10 18:07:45,726] Trial 6 finished with value: 0.8216170297587255 and parameters: {'model': 'XGBoost', 'test_size': 0.1857452733196637, 'random_state': 42, 'xgb_n_estimators': 464, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.03211828177402382, 'xgb_subsample': 0.9649240828764993, 'xgb_colsample_bytree': 0.5498904494734638}. Best is trial 3 with value: 0.8321559155971131.
[I 2025-07-10 18:07:45,846] Trial 7 finished with value: 0.7491279255083849 and parameters: {'model': 'LinearRegression', 'test_size': 0.15916280359581272, 'random_state': 54}. Best is trial 3 with value: 0.8321559155971131.


Best trial: 3. Best value: 0.832156:   9%|▉         | 9/100 [00:18<02:31,  1.67s/it]

[I 2025-07-10 18:07:49,036] Trial 8 finished with value: 0.7664895244393001 and parameters: {'model': 'RandomForest', 'test_size': 0.19137814642469872, 'random_state': 74, 'rf_n_estimators': 209, 'rf_max_depth': 14, 'rf_min_samples_split': 7, 'rf_min_samples_leaf': 2}. Best is trial 3 with value: 0.8321559155971131.


Best trial: 3. Best value: 0.832156:  10%|█         | 10/100 [00:22<03:23,  2.26s/it]

[I 2025-07-10 18:07:52,744] Trial 9 finished with value: 0.7465758851525188 and parameters: {'model': 'RandomForest', 'test_size': 0.15607801169273702, 'random_state': 38, 'rf_n_estimators': 236, 'rf_max_depth': 14, 'rf_min_samples_split': 3, 'rf_min_samples_leaf': 3}. Best is trial 3 with value: 0.8321559155971131.


Best trial: 3. Best value: 0.832156:  11%|█         | 11/100 [00:22<02:30,  1.69s/it]

[I 2025-07-10 18:07:53,052] Trial 10 finished with value: 0.8103030050895459 and parameters: {'model': 'XGBoost', 'test_size': 0.2961868925378458, 'random_state': 93, 'xgb_n_estimators': 171, 'xgb_max_depth': 5, 'xgb_learning_rate': 0.29117361376116324, 'xgb_subsample': 0.5387758658444355, 'xgb_colsample_bytree': 0.8837640595233373}. Best is trial 3 with value: 0.8321559155971131.


Best trial: 3. Best value: 0.832156:  12%|█▏        | 12/100 [00:25<02:56,  2.00s/it]

[I 2025-07-10 18:07:55,808] Trial 11 finished with value: 0.8066527808572865 and parameters: {'model': 'XGBoost', 'test_size': 0.1932429819276475, 'random_state': 37, 'xgb_n_estimators': 481, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.023300320337760496, 'xgb_subsample': 0.9734731482304712, 'xgb_colsample_bytree': 0.5009892379418948}. Best is trial 3 with value: 0.8321559155971131.


Best trial: 12. Best value: 0.834024:  13%|█▎        | 13/100 [00:26<02:27,  1.70s/it]

[I 2025-07-10 18:07:56,775] Trial 12 finished with value: 0.8340241660691973 and parameters: {'model': 'XGBoost', 'test_size': 0.1748910166922801, 'random_state': 1, 'xgb_n_estimators': 342, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.20841903504609047, 'xgb_subsample': 0.6865472635363414, 'xgb_colsample_bytree': 0.609363488577416}. Best is trial 12 with value: 0.8340241660691973.


Best trial: 13. Best value: 0.834815:  14%|█▍        | 14/100 [00:27<01:59,  1.39s/it]

[I 2025-07-10 18:07:57,447] Trial 13 finished with value: 0.8348149573489659 and parameters: {'model': 'XGBoost', 'test_size': 0.12093898829150075, 'random_state': 8, 'xgb_n_estimators': 295, 'xgb_max_depth': 9, 'xgb_learning_rate': 0.2482678597108457, 'xgb_subsample': 0.6615746064400199, 'xgb_colsample_bytree': 0.7082861612509845}. Best is trial 13 with value: 0.8348149573489659.


Best trial: 14. Best value: 0.839356:  15%|█▌        | 15/100 [00:27<01:43,  1.22s/it]

[I 2025-07-10 18:07:58,263] Trial 14 finished with value: 0.8393559833129755 and parameters: {'model': 'XGBoost', 'test_size': 0.105858236945135, 'random_state': 4, 'xgb_n_estimators': 326, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.1869594340630223, 'xgb_subsample': 0.7492915335115112, 'xgb_colsample_bytree': 0.6960725350094426}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  16%|█▌        | 16/100 [00:28<01:28,  1.06s/it]

[I 2025-07-10 18:07:58,937] Trial 15 finished with value: 0.8286687029283243 and parameters: {'model': 'XGBoost', 'test_size': 0.10175140872377585, 'random_state': 4, 'xgb_n_estimators': 302, 'xgb_max_depth': 8, 'xgb_learning_rate': 0.16044681858319493, 'xgb_subsample': 0.7969425843147975, 'xgb_colsample_bytree': 0.7257141831837839}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  17%|█▋        | 17/100 [00:29<01:16,  1.08it/s]

[I 2025-07-10 18:07:59,554] Trial 16 finished with value: 0.8279134472118959 and parameters: {'model': 'XGBoost', 'test_size': 0.1265909836007366, 'random_state': 11, 'xgb_n_estimators': 236, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.2021445143735109, 'xgb_subsample': 0.8162814135985126, 'xgb_colsample_bytree': 0.6815465638050865}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  18%|█▊        | 18/100 [00:29<01:05,  1.25it/s]

[I 2025-07-10 18:08:00,066] Trial 17 finished with value: 0.819244814230317 and parameters: {'model': 'XGBoost', 'test_size': 0.1246653511606153, 'random_state': 27, 'xgb_n_estimators': 387, 'xgb_max_depth': 5, 'xgb_learning_rate': 0.22583910427830406, 'xgb_subsample': 0.7000897422381608, 'xgb_colsample_bytree': 0.8346376916371054}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  19%|█▉        | 19/100 [00:30<00:53,  1.52it/s]

[I 2025-07-10 18:08:00,393] Trial 18 finished with value: 0.8025050515971682 and parameters: {'model': 'XGBoost', 'test_size': 0.1317818320584879, 'random_state': 12, 'xgb_n_estimators': 117, 'xgb_max_depth': 7, 'xgb_learning_rate': 0.10669811178100522, 'xgb_subsample': 0.5846427275619854, 'xgb_colsample_bytree': 0.9995870032377859}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  20%|██        | 20/100 [00:31<01:07,  1.18it/s]

[I 2025-07-10 18:08:01,668] Trial 19 finished with value: 0.8306337310270995 and parameters: {'model': 'XGBoost', 'test_size': 0.22550204267568827, 'random_state': 77, 'xgb_n_estimators': 391, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.12293347029322695, 'xgb_subsample': 0.8388936971369181, 'xgb_colsample_bytree': 0.6467949082173973}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  21%|██        | 21/100 [00:32<01:05,  1.20it/s]

[I 2025-07-10 18:08:02,469] Trial 20 finished with value: 0.8199737294875855 and parameters: {'model': 'XGBoost', 'test_size': 0.1040562582823286, 'random_state': 11, 'xgb_n_estimators': 228, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.2502590265319665, 'xgb_subsample': 0.7378529956518888, 'xgb_colsample_bytree': 0.8198351197801699}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  22%|██▏       | 22/100 [00:33<01:05,  1.19it/s]

[I 2025-07-10 18:08:03,338] Trial 21 finished with value: 0.8387851669387404 and parameters: {'model': 'XGBoost', 'test_size': 0.14224469426066533, 'random_state': 1, 'xgb_n_estimators': 343, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.1730662118217477, 'xgb_subsample': 0.6685907396868596, 'xgb_colsample_bytree': 0.6217305526176958}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 14. Best value: 0.839356:  23%|██▎       | 23/100 [00:33<01:01,  1.26it/s]

[I 2025-07-10 18:08:04,016] Trial 22 finished with value: 0.8205188691415712 and parameters: {'model': 'XGBoost', 'test_size': 0.14004292955659478, 'random_state': 3, 'xgb_n_estimators': 363, 'xgb_max_depth': 7, 'xgb_learning_rate': 0.17279728260227062, 'xgb_subsample': 0.6349720689738927, 'xgb_colsample_bytree': 0.6052443277896886}. Best is trial 14 with value: 0.8393559833129755.


Best trial: 23. Best value: 0.848627:  24%|██▍       | 24/100 [00:34<01:01,  1.24it/s]

[I 2025-07-10 18:08:04,846] Trial 23 finished with value: 0.8486272601257181 and parameters: {'model': 'XGBoost', 'test_size': 0.12195242919488507, 'random_state': 31, 'xgb_n_estimators': 284, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.18299928369141574, 'xgb_subsample': 0.640256533194376, 'xgb_colsample_bytree': 0.6837741490541319}. Best is trial 23 with value: 0.8486272601257181.


Best trial: 23. Best value: 0.848627:  25%|██▌       | 25/100 [00:35<01:02,  1.20it/s]

[I 2025-07-10 18:08:05,735] Trial 24 finished with value: 0.8431937869747086 and parameters: {'model': 'XGBoost', 'test_size': 0.1450495087591512, 'random_state': 31, 'xgb_n_estimators': 249, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.10444293563555346, 'xgb_subsample': 0.7560017775907633, 'xgb_colsample_bytree': 0.6323059229264987}. Best is trial 23 with value: 0.8486272601257181.


Best trial: 25. Best value: 0.854209:  26%|██▌       | 26/100 [00:36<01:12,  1.03it/s]

[I 2025-07-10 18:08:07,048] Trial 25 finished with value: 0.8542093645634888 and parameters: {'model': 'XGBoost', 'test_size': 0.11852221282861483, 'random_state': 30, 'xgb_n_estimators': 238, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.0969608120284654, 'xgb_subsample': 0.7648265603125547, 'xgb_colsample_bytree': 0.7901851929461279}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  27%|██▋       | 27/100 [00:38<01:17,  1.07s/it]

[I 2025-07-10 18:08:08,335] Trial 26 finished with value: 0.8441919285488233 and parameters: {'model': 'XGBoost', 'test_size': 0.15158953822015772, 'random_state': 30, 'xgb_n_estimators': 234, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.07447425750632318, 'xgb_subsample': 0.8950718956857038, 'xgb_colsample_bytree': 0.8008052630694469}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  29%|██▉       | 29/100 [00:39<00:59,  1.20it/s]

[I 2025-07-10 18:08:09,613] Trial 27 finished with value: 0.8117815223021346 and parameters: {'model': 'XGBoost', 'test_size': 0.17073471236980584, 'random_state': 45, 'xgb_n_estimators': 185, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.06752120840137452, 'xgb_subsample': 0.8807197326700129, 'xgb_colsample_bytree': 0.8082279932775781}. Best is trial 25 with value: 0.8542093645634888.
[I 2025-07-10 18:08:09,750] Trial 28 finished with value: 0.752089199364399 and parameters: {'model': 'LinearRegression', 'test_size': 0.11767129001619585, 'random_state': 31}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  30%|███       | 30/100 [00:39<00:48,  1.44it/s]

[I 2025-07-10 18:08:10,113] Trial 29 finished with value: 0.68598720909515 and parameters: {'model': 'RandomForest', 'test_size': 0.21053470060087767, 'random_state': 18, 'rf_n_estimators': 108, 'rf_max_depth': 3, 'rf_min_samples_split': 2, 'rf_min_samples_leaf': 5}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  31%|███       | 31/100 [00:48<03:33,  3.09s/it]

[I 2025-07-10 18:08:18,793] Trial 30 finished with value: 0.7803803933190717 and parameters: {'model': 'RandomForest', 'test_size': 0.15258693581928134, 'random_state': 50, 'rf_n_estimators': 370, 'rf_max_depth': 20, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 1}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  32%|███▏      | 32/100 [00:49<02:50,  2.51s/it]

[I 2025-07-10 18:08:19,951] Trial 31 finished with value: 0.8427739340922136 and parameters: {'model': 'XGBoost', 'test_size': 0.1415666473612691, 'random_state': 31, 'xgb_n_estimators': 242, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.10353639269231356, 'xgb_subsample': 0.8737087574363572, 'xgb_colsample_bytree': 0.7801114784842904}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  33%|███▎      | 33/100 [00:51<02:31,  2.26s/it]

[I 2025-07-10 18:08:21,630] Trial 32 finished with value: 0.8354748017838075 and parameters: {'model': 'XGBoost', 'test_size': 0.13319345364073468, 'random_state': 33, 'xgb_n_estimators': 262, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.07083581865080274, 'xgb_subsample': 0.9097812508967011, 'xgb_colsample_bytree': 0.8867731622625089}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  34%|███▍      | 34/100 [00:52<02:02,  1.85s/it]

[I 2025-07-10 18:08:22,526] Trial 33 finished with value: 0.8022337422508679 and parameters: {'model': 'XGBoost', 'test_size': 0.14864374897427146, 'random_state': 24, 'xgb_n_estimators': 190, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.12732427597514606, 'xgb_subsample': 0.7910881927506812, 'xgb_colsample_bytree': 0.7809979683575187}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  35%|███▌      | 35/100 [00:53<01:54,  1.76s/it]

[I 2025-07-10 18:08:24,078] Trial 34 finished with value: 0.8093948091875027 and parameters: {'model': 'XGBoost', 'test_size': 0.17106512585798955, 'random_state': 19, 'xgb_n_estimators': 212, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.07331901451159772, 'xgb_subsample': 0.7271918788227332, 'xgb_colsample_bytree': 0.8759028009716006}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  37%|███▋      | 37/100 [00:54<01:05,  1.04s/it]

[I 2025-07-10 18:08:24,723] Trial 35 finished with value: 0.8234764611863694 and parameters: {'model': 'XGBoost', 'test_size': 0.1172168032650408, 'random_state': 47, 'xgb_n_estimators': 145, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.13697317271587345, 'xgb_subsample': 0.5918368101430183, 'xgb_colsample_bytree': 0.6633772045001228}. Best is trial 25 with value: 0.8542093645634888.
[I 2025-07-10 18:08:24,863] Trial 36 finished with value: 0.718466111225351 and parameters: {'model': 'LinearRegression', 'test_size': 0.11475847699593152, 'random_state': 64}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  38%|███▊      | 38/100 [00:55<00:59,  1.03it/s]

[I 2025-07-10 18:08:25,658] Trial 37 finished with value: 0.8060514272858414 and parameters: {'model': 'XGBoost', 'test_size': 0.25024670997109427, 'random_state': 38, 'xgb_n_estimators': 271, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.088807682422157, 'xgb_subsample': 0.5006522820246726, 'xgb_colsample_bytree': 0.7572293209860268}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  40%|████      | 40/100 [00:55<00:36,  1.63it/s]

[I 2025-07-10 18:08:26,097] Trial 38 finished with value: 0.706959024815009 and parameters: {'model': 'RandomForest', 'test_size': 0.1787392269136126, 'random_state': 23, 'rf_n_estimators': 106, 'rf_max_depth': 4, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 1}. Best is trial 25 with value: 0.8542093645634888.
[I 2025-07-10 18:08:26,257] Trial 39 finished with value: 0.7296658306704288 and parameters: {'model': 'LinearRegression', 'test_size': 0.16145922423243148, 'random_state': 17}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  41%|████      | 41/100 [00:56<00:29,  1.98it/s]

[I 2025-07-10 18:08:26,507] Trial 40 finished with value: 0.7605672865187363 and parameters: {'model': 'XGBoost', 'test_size': 0.13374044045124953, 'random_state': 42, 'xgb_n_estimators': 209, 'xgb_max_depth': 3, 'xgb_learning_rate': 0.044556907046996524, 'xgb_subsample': 0.7731666766768449, 'xgb_colsample_bytree': 0.575178922164558}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  42%|████▏     | 42/100 [00:57<00:46,  1.24it/s]

[I 2025-07-10 18:08:28,014] Trial 41 finished with value: 0.835937025188232 and parameters: {'model': 'XGBoost', 'test_size': 0.14257023624186477, 'random_state': 32, 'xgb_n_estimators': 259, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.10682069176008635, 'xgb_subsample': 0.8739963864432903, 'xgb_colsample_bytree': 0.7843178877558035}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  43%|████▎     | 43/100 [00:59<00:57,  1.01s/it]

[I 2025-07-10 18:08:29,484] Trial 42 finished with value: 0.8369505206333592 and parameters: {'model': 'XGBoost', 'test_size': 0.14929806803154755, 'random_state': 28, 'xgb_n_estimators': 248, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.09069268919628594, 'xgb_subsample': 0.9271028396019261, 'xgb_colsample_bytree': 0.7383801001465715}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  44%|████▍     | 44/100 [01:00<00:56,  1.01s/it]

[I 2025-07-10 18:08:30,497] Trial 43 finished with value: 0.8099762819753121 and parameters: {'model': 'XGBoost', 'test_size': 0.16526351908200235, 'random_state': 35, 'xgb_n_estimators': 228, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.14884194979988974, 'xgb_subsample': 0.8441303248860321, 'xgb_colsample_bytree': 0.8433429864938577}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  45%|████▌     | 45/100 [01:01<01:07,  1.22s/it]

[I 2025-07-10 18:08:32,216] Trial 44 finished with value: 0.8128720201618881 and parameters: {'model': 'XGBoost', 'test_size': 0.11227228761858786, 'random_state': 42, 'xgb_n_estimators': 306, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.052793435256993274, 'xgb_subsample': 0.92866323219495, 'xgb_colsample_bytree': 0.7820112974501415}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  47%|████▋     | 47/100 [01:03<00:49,  1.07it/s]

[I 2025-07-10 18:08:33,581] Trial 45 finished with value: 0.8252880881145495 and parameters: {'model': 'XGBoost', 'test_size': 0.20789336978437334, 'random_state': 60, 'xgb_n_estimators': 276, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.1026798666807645, 'xgb_subsample': 0.8739300167529569, 'xgb_colsample_bytree': 0.7483902467463364}. Best is trial 25 with value: 0.8542093645634888.
[I 2025-07-10 18:08:33,746] Trial 46 finished with value: 0.7314449257916575 and parameters: {'model': 'LinearRegression', 'test_size': 0.12760433206944094, 'random_state': 29}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  48%|████▊     | 48/100 [01:07<01:37,  1.87s/it]

[I 2025-07-10 18:08:37,803] Trial 47 finished with value: 0.7762502539909357 and parameters: {'model': 'RandomForest', 'test_size': 0.13789636207526543, 'random_state': 22, 'rf_n_estimators': 480, 'rf_max_depth': 8, 'rf_min_samples_split': 4, 'rf_min_samples_leaf': 5}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  49%|████▉     | 49/100 [01:08<01:25,  1.68s/it]

[I 2025-07-10 18:08:39,028] Trial 48 finished with value: 0.8373857188429272 and parameters: {'model': 'XGBoost', 'test_size': 0.182979441380863, 'random_state': 15, 'xgb_n_estimators': 205, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.11902574867710944, 'xgb_subsample': 0.7112073029497454, 'xgb_colsample_bytree': 0.9197911373017705}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  50%|█████     | 50/100 [01:09<01:07,  1.35s/it]

[I 2025-07-10 18:08:39,611] Trial 49 finished with value: 0.8147192310154794 and parameters: {'model': 'XGBoost', 'test_size': 0.1560678646650222, 'random_state': 39, 'xgb_n_estimators': 163, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.14514696717356096, 'xgb_subsample': 0.7653371782540608, 'xgb_colsample_bytree': 0.6546183130722005}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  51%|█████     | 51/100 [01:10<01:04,  1.31s/it]

[I 2025-07-10 18:08:40,844] Trial 50 finished with value: 0.825272491467995 and parameters: {'model': 'XGBoost', 'test_size': 0.10042003410388808, 'random_state': 98, 'xgb_n_estimators': 242, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.08770177266618591, 'xgb_subsample': 0.9949621640271378, 'xgb_colsample_bytree': 0.7621257787867817}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  52%|█████▏    | 52/100 [01:11<00:55,  1.16s/it]

[I 2025-07-10 18:08:41,648] Trial 51 finished with value: 0.8259025601967032 and parameters: {'model': 'XGBoost', 'test_size': 0.1232325551893874, 'random_state': 7, 'xgb_n_estimators': 318, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.18201638982018495, 'xgb_subsample': 0.8435672198329041, 'xgb_colsample_bytree': 0.6993837994521296}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  53%|█████▎    | 53/100 [01:12<00:50,  1.08s/it]

[I 2025-07-10 18:08:42,531] Trial 52 finished with value: 0.8256185008603745 and parameters: {'model': 'XGBoost', 'test_size': 0.10846961078501202, 'random_state': 27, 'xgb_n_estimators': 335, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.19123676838296466, 'xgb_subsample': 0.7548276852015855, 'xgb_colsample_bytree': 0.716506145338148}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  54%|█████▍    | 54/100 [01:14<01:02,  1.36s/it]

[I 2025-07-10 18:08:44,563] Trial 53 finished with value: 0.7837872845158977 and parameters: {'model': 'XGBoost', 'test_size': 0.1082443376369931, 'random_state': 50, 'xgb_n_estimators': 286, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.013501499934754663, 'xgb_subsample': 0.6137965083441798, 'xgb_colsample_bytree': 0.694839448784518}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  55%|█████▌    | 55/100 [01:14<00:50,  1.13s/it]

[I 2025-07-10 18:08:45,159] Trial 54 finished with value: 0.8207472737015928 and parameters: {'model': 'XGBoost', 'test_size': 0.14415935181579762, 'random_state': 34, 'xgb_n_estimators': 279, 'xgb_max_depth': 8, 'xgb_learning_rate': 0.21869706413614098, 'xgb_subsample': 0.8095149318964513, 'xgb_colsample_bytree': 0.6315821238808627}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  56%|█████▌    | 56/100 [01:15<00:45,  1.03s/it]

[I 2025-07-10 18:08:45,957] Trial 55 finished with value: 0.8161465197210962 and parameters: {'model': 'XGBoost', 'test_size': 0.12501114030160285, 'random_state': 56, 'xgb_n_estimators': 257, 'xgb_max_depth': 9, 'xgb_learning_rate': 0.046946181195270595, 'xgb_subsample': 0.9556191795907818, 'xgb_colsample_bytree': 0.6747561758776414}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  57%|█████▋    | 57/100 [01:16<00:45,  1.07s/it]

[I 2025-07-10 18:08:47,110] Trial 56 finished with value: 0.8463577607845478 and parameters: {'model': 'XGBoost', 'test_size': 0.13115079829937892, 'random_state': 21, 'xgb_n_estimators': 318, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.15952862973901005, 'xgb_subsample': 0.9030660139828093, 'xgb_colsample_bytree': 0.5828970683514713}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  58%|█████▊    | 58/100 [01:18<00:50,  1.21s/it]

[I 2025-07-10 18:08:48,631] Trial 57 finished with value: 0.8377668596412557 and parameters: {'model': 'XGBoost', 'test_size': 0.13380211840653786, 'random_state': 26, 'xgb_n_estimators': 440, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.1540032050436272, 'xgb_subsample': 0.9098338497974807, 'xgb_colsample_bytree': 0.5096724098499377}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  59%|█████▉    | 59/100 [01:19<00:47,  1.16s/it]

[I 2025-07-10 18:08:49,700] Trial 58 finished with value: 0.8377617888394351 and parameters: {'model': 'XGBoost', 'test_size': 0.16464135924549392, 'random_state': 20, 'xgb_n_estimators': 298, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.16069050714413405, 'xgb_subsample': 0.888129052509502, 'xgb_colsample_bytree': 0.573046526488567}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  60%|██████    | 60/100 [01:20<00:45,  1.14s/it]

[I 2025-07-10 18:08:50,788] Trial 59 finished with value: 0.8215314307823557 and parameters: {'model': 'XGBoost', 'test_size': 0.14879771842048933, 'random_state': 13, 'xgb_n_estimators': 224, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.13347864708611606, 'xgb_subsample': 0.9497307822089148, 'xgb_colsample_bytree': 0.5913014817729983}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  61%|██████    | 61/100 [01:26<01:38,  2.53s/it]

[I 2025-07-10 18:08:56,561] Trial 60 finished with value: 0.7765257725082703 and parameters: {'model': 'RandomForest', 'test_size': 0.2841063465399474, 'random_state': 29, 'rf_n_estimators': 301, 'rf_max_depth': 20, 'rf_min_samples_split': 8, 'rf_min_samples_leaf': 2}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  62%|██████▏   | 62/100 [01:27<01:19,  2.10s/it]

[I 2025-07-10 18:08:57,641] Trial 61 finished with value: 0.8334260181568354 and parameters: {'model': 'XGBoost', 'test_size': 0.11852770106380014, 'random_state': 8, 'xgb_n_estimators': 308, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.1955069923188394, 'xgb_subsample': 0.6803359036772865, 'xgb_colsample_bytree': 0.5421924009756229}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  63%|██████▎   | 63/100 [01:28<01:09,  1.88s/it]

[I 2025-07-10 18:08:59,002] Trial 62 finished with value: 0.8071665061652331 and parameters: {'model': 'XGBoost', 'test_size': 0.129002448214253, 'random_state': 36, 'xgb_n_estimators': 323, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.11146383768678163, 'xgb_subsample': 0.823488981093291, 'xgb_colsample_bytree': 0.8002978597599463}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  64%|██████▍   | 64/100 [01:30<01:02,  1.72s/it]

[I 2025-07-10 18:09:00,374] Trial 63 finished with value: 0.8476462492991294 and parameters: {'model': 'XGBoost', 'test_size': 0.10748358352075793, 'random_state': 22, 'xgb_n_estimators': 360, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.17108066800322042, 'xgb_subsample': 0.7163732755912472, 'xgb_colsample_bytree': 0.7192797684952074}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  65%|██████▌   | 65/100 [01:31<00:57,  1.63s/it]

[I 2025-07-10 18:09:01,797] Trial 64 finished with value: 0.8146494282923435 and parameters: {'model': 'XGBoost', 'test_size': 0.13715423654865555, 'random_state': 23, 'xgb_n_estimators': 388, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.1652845383942647, 'xgb_subsample': 0.7139099447484623, 'xgb_colsample_bytree': 0.7276628758842966}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  67%|██████▋   | 67/100 [01:33<00:37,  1.14s/it]

[I 2025-07-10 18:09:03,175] Trial 65 finished with value: 0.8504976971996799 and parameters: {'model': 'XGBoost', 'test_size': 0.1124007634175267, 'random_state': 31, 'xgb_n_estimators': 370, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.09568142806808713, 'xgb_subsample': 0.7866425921803493, 'xgb_colsample_bytree': 0.846117878770061}. Best is trial 25 with value: 0.8542093645634888.
[I 2025-07-10 18:09:03,346] Trial 66 finished with value: 0.7492447807647982 and parameters: {'model': 'LinearRegression', 'test_size': 0.11042832995206313, 'random_state': 40}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  68%|██████▊   | 68/100 [01:34<00:38,  1.21s/it]

[I 2025-07-10 18:09:04,705] Trial 67 finished with value: 0.8151008578183547 and parameters: {'model': 'XGBoost', 'test_size': 0.12181687730435953, 'random_state': 45, 'xgb_n_estimators': 371, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.0847157657589816, 'xgb_subsample': 0.7776470241527133, 'xgb_colsample_bytree': 0.8303193421036763}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  69%|██████▉   | 69/100 [01:35<00:38,  1.23s/it]

[I 2025-07-10 18:09:05,983] Trial 68 finished with value: 0.8462699780452685 and parameters: {'model': 'XGBoost', 'test_size': 0.100685452752814, 'random_state': 22, 'xgb_n_estimators': 416, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.06199773132129012, 'xgb_subsample': 0.6454185829077319, 'xgb_colsample_bytree': 0.6367360881047961}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  70%|███████   | 70/100 [01:37<00:41,  1.38s/it]

[I 2025-07-10 18:09:07,727] Trial 69 finished with value: 0.8216355315724302 and parameters: {'model': 'XGBoost', 'test_size': 0.10139156870867501, 'random_state': 19, 'xgb_n_estimators': 448, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.060697895048752774, 'xgb_subsample': 0.6429412082645886, 'xgb_colsample_bytree': 0.8445302733774603}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  71%|███████   | 71/100 [01:38<00:38,  1.31s/it]

[I 2025-07-10 18:09:08,876] Trial 70 finished with value: 0.8228282497356352 and parameters: {'model': 'XGBoost', 'test_size': 0.11454002058318125, 'random_state': 15, 'xgb_n_estimators': 355, 'xgb_max_depth': 9, 'xgb_learning_rate': 0.034251544775372886, 'xgb_subsample': 0.5751273511524525, 'xgb_colsample_bytree': 0.869824783973447}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  72%|███████▏  | 72/100 [01:39<00:35,  1.27s/it]

[I 2025-07-10 18:09:10,058] Trial 71 finished with value: 0.8318339671037343 and parameters: {'model': 'XGBoost', 'test_size': 0.10542940528689636, 'random_state': 25, 'xgb_n_estimators': 413, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.17799177871221405, 'xgb_subsample': 0.6492329254553438, 'xgb_colsample_bytree': 0.6295748075502906}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  73%|███████▎  | 73/100 [01:40<00:32,  1.21s/it]

[I 2025-07-10 18:09:11,107] Trial 72 finished with value: 0.8367157238678886 and parameters: {'model': 'XGBoost', 'test_size': 0.11889082795591131, 'random_state': 21, 'xgb_n_estimators': 374, 'xgb_max_depth': 11, 'xgb_learning_rate': 0.07798872022321249, 'xgb_subsample': 0.6891046767853289, 'xgb_colsample_bytree': 0.6129596122150958}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  74%|███████▍  | 74/100 [01:41<00:31,  1.20s/it]

[I 2025-07-10 18:09:12,306] Trial 73 finished with value: 0.8459751068746981 and parameters: {'model': 'XGBoost', 'test_size': 0.12768370233092266, 'random_state': 31, 'xgb_n_estimators': 338, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.09620506257664188, 'xgb_subsample': 0.7320894915184192, 'xgb_colsample_bytree': 0.6759976142012307}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  75%|███████▌  | 75/100 [01:43<00:33,  1.35s/it]

[I 2025-07-10 18:09:13,981] Trial 74 finished with value: 0.8525350513026142 and parameters: {'model': 'XGBoost', 'test_size': 0.11137762075916456, 'random_state': 30, 'xgb_n_estimators': 410, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.06098814858938168, 'xgb_subsample': 0.6136739448852868, 'xgb_colsample_bytree': 0.6815985662107107}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  76%|███████▌  | 76/100 [01:45<00:37,  1.57s/it]

[I 2025-07-10 18:09:16,071] Trial 75 finished with value: 0.8349864324509485 and parameters: {'model': 'XGBoost', 'test_size': 0.11224981185562569, 'random_state': 33, 'xgb_n_estimators': 403, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.03288213720569241, 'xgb_subsample': 0.6185014745518569, 'xgb_colsample_bytree': 0.682595110107413}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  77%|███████▋  | 77/100 [01:46<00:32,  1.41s/it]

[I 2025-07-10 18:09:17,106] Trial 76 finished with value: 0.8203092209649175 and parameters: {'model': 'XGBoost', 'test_size': 0.10599933318244596, 'random_state': 16, 'xgb_n_estimators': 421, 'xgb_max_depth': 10, 'xgb_learning_rate': 0.2890814254769814, 'xgb_subsample': 0.7341394963567748, 'xgb_colsample_bytree': 0.7069745963882708}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  78%|███████▊  | 78/100 [01:47<00:28,  1.29s/it]

[I 2025-07-10 18:09:18,123] Trial 77 finished with value: 0.8315139958775554 and parameters: {'model': 'XGBoost', 'test_size': 0.12835234835676612, 'random_state': 25, 'xgb_n_estimators': 348, 'xgb_max_depth': 12, 'xgb_learning_rate': 0.21244008750493892, 'xgb_subsample': 0.6635135237740794, 'xgb_colsample_bytree': 0.6503366835361661}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  79%|███████▉  | 79/100 [01:49<00:30,  1.45s/it]

[I 2025-07-10 18:09:19,936] Trial 78 finished with value: 0.7783278106142123 and parameters: {'model': 'RandomForest', 'test_size': 0.10073469526193193, 'random_state': 78, 'rf_n_estimators': 217, 'rf_max_depth': 8, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 4}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 25. Best value: 0.854209:  81%|████████  | 81/100 [01:51<00:21,  1.13s/it]

[I 2025-07-10 18:09:21,788] Trial 79 finished with value: 0.8182417723438042 and parameters: {'model': 'XGBoost', 'test_size': 0.12204790187115502, 'random_state': 36, 'xgb_n_estimators': 473, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.06333564349204082, 'xgb_subsample': 0.6225319511521933, 'xgb_colsample_bytree': 0.6733355558041144}. Best is trial 25 with value: 0.8542093645634888.
[I 2025-07-10 18:09:21,903] Trial 80 finished with value: 0.6950992332015746 and parameters: {'model': 'LinearRegression', 'test_size': 0.24796599685414616, 'random_state': 29}. Best is trial 25 with value: 0.8542093645634888.


Best trial: 81. Best value: 0.857045:  82%|████████▏ | 82/100 [01:53<00:22,  1.25s/it]

[I 2025-07-10 18:09:23,429] Trial 81 finished with value: 0.8570447971967452 and parameters: {'model': 'XGBoost', 'test_size': 0.11295952066486527, 'random_state': 31, 'xgb_n_estimators': 432, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.09481139283396632, 'xgb_subsample': 0.5543474912122496, 'xgb_colsample_bytree': 0.8026017979368394}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  83%|████████▎ | 83/100 [01:54<00:22,  1.35s/it]

[I 2025-07-10 18:09:25,001] Trial 82 finished with value: 0.8476214634955936 and parameters: {'model': 'XGBoost', 'test_size': 0.11465970188625903, 'random_state': 32, 'xgb_n_estimators': 435, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.05605224810046925, 'xgb_subsample': 0.5551697668577857, 'xgb_colsample_bytree': 0.6908063148279385}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  84%|████████▍ | 84/100 [01:56<00:23,  1.44s/it]

[I 2025-07-10 18:09:26,671] Trial 83 finished with value: 0.8391751620238166 and parameters: {'model': 'XGBoost', 'test_size': 0.11039101974092368, 'random_state': 22, 'xgb_n_estimators': 437, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.054425913628088546, 'xgb_subsample': 0.5584499305849899, 'xgb_colsample_bytree': 0.7255471451269883}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  85%|████████▌ | 85/100 [01:58<00:26,  1.76s/it]

[I 2025-07-10 18:09:29,152] Trial 84 finished with value: 0.8187830576860731 and parameters: {'model': 'XGBoost', 'test_size': 0.11539717840165696, 'random_state': 27, 'xgb_n_estimators': 461, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.041114037364282194, 'xgb_subsample': 0.5157554602010179, 'xgb_colsample_bytree': 0.765191396279533}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  86%|████████▌ | 86/100 [02:00<00:22,  1.61s/it]

[I 2025-07-10 18:09:30,439] Trial 85 finished with value: 0.8503369616645609 and parameters: {'model': 'XGBoost', 'test_size': 0.10656001777005832, 'random_state': 33, 'xgb_n_estimators': 401, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.1693011602627436, 'xgb_subsample': 0.5704057387627277, 'xgb_colsample_bytree': 0.6931045315030144}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  87%|████████▋ | 87/100 [02:01<00:21,  1.62s/it]

[I 2025-07-10 18:09:32,062] Trial 86 finished with value: 0.838337878242016 and parameters: {'model': 'XGBoost', 'test_size': 0.1178943397531386, 'random_state': 40, 'xgb_n_estimators': 428, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.20147932987938078, 'xgb_subsample': 0.5575196544536677, 'xgb_colsample_bytree': 0.7374112696966595}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  88%|████████▊ | 88/100 [02:03<00:19,  1.60s/it]

[I 2025-07-10 18:09:33,608] Trial 87 finished with value: 0.845700364452602 and parameters: {'model': 'XGBoost', 'test_size': 0.10668478505116478, 'random_state': 33, 'xgb_n_estimators': 489, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.16612746638922243, 'xgb_subsample': 0.5369448953424572, 'xgb_colsample_bytree': 0.695091573179721}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  89%|████████▉ | 89/100 [02:04<00:17,  1.60s/it]

[I 2025-07-10 18:09:35,210] Trial 88 finished with value: 0.8091887664514392 and parameters: {'model': 'XGBoost', 'test_size': 0.11247756374726203, 'random_state': 37, 'xgb_n_estimators': 399, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.11681427638229591, 'xgb_subsample': 0.5852304726129928, 'xgb_colsample_bytree': 0.8948250905417563}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  90%|█████████ | 90/100 [02:06<00:15,  1.60s/it]

[I 2025-07-10 18:09:36,807] Trial 89 finished with value: 0.824246741623394 and parameters: {'model': 'XGBoost', 'test_size': 0.12207479579177584, 'random_state': 43, 'xgb_n_estimators': 374, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.1708918996561143, 'xgb_subsample': 0.5981865365831452, 'xgb_colsample_bytree': 0.8595281739600227}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  91%|█████████ | 91/100 [02:07<00:13,  1.54s/it]

[I 2025-07-10 18:09:38,203] Trial 90 finished with value: 0.8121609838657314 and parameters: {'model': 'XGBoost', 'test_size': 0.24049334283173654, 'random_state': 35, 'xgb_n_estimators': 397, 'xgb_max_depth': 13, 'xgb_learning_rate': 0.1364941219271078, 'xgb_subsample': 0.5334189983153766, 'xgb_colsample_bytree': 0.817320335182347}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 81. Best value: 0.857045:  92%|█████████▏| 92/100 [02:08<00:10,  1.32s/it]

[I 2025-07-10 18:09:39,003] Trial 91 finished with value: 0.8166806059312374 and parameters: {'model': 'XGBoost', 'test_size': 0.10356408700502186, 'random_state': 24, 'xgb_n_estimators': 418, 'xgb_max_depth': 8, 'xgb_learning_rate': 0.07972985552137565, 'xgb_subsample': 0.5654972907086261, 'xgb_colsample_bytree': 0.6436482588987305}. Best is trial 81 with value: 0.8570447971967452.


Best trial: 92. Best value: 0.857394:  93%|█████████▎| 93/100 [02:10<00:09,  1.37s/it]

[I 2025-07-10 18:09:40,493] Trial 92 finished with value: 0.8573939494214106 and parameters: {'model': 'XGBoost', 'test_size': 0.11490513199515942, 'random_state': 30, 'xgb_n_estimators': 409, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.1850285117856906, 'xgb_subsample': 0.6047183579319938, 'xgb_colsample_bytree': 0.6867081248247048}. Best is trial 92 with value: 0.8573939494214106.


Best trial: 93. Best value: 0.861382:  94%|█████████▍| 94/100 [02:11<00:08,  1.43s/it]

[I 2025-07-10 18:09:42,054] Trial 93 finished with value: 0.8613817402823094 and parameters: {'model': 'XGBoost', 'test_size': 0.1347662709076835, 'random_state': 30, 'xgb_n_estimators': 455, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.18579052942455954, 'xgb_subsample': 0.5467891860641703, 'xgb_colsample_bytree': 0.7159168039483006}. Best is trial 93 with value: 0.8613817402823094.


Best trial: 93. Best value: 0.861382:  95%|█████████▌| 95/100 [02:13<00:07,  1.56s/it]

[I 2025-07-10 18:09:43,913] Trial 94 finished with value: 0.8592775488563121 and parameters: {'model': 'XGBoost', 'test_size': 0.1149244524057341, 'random_state': 31, 'xgb_n_estimators': 465, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.18795553846820076, 'xgb_subsample': 0.5965499228004891, 'xgb_colsample_bytree': 0.6887317401678943}. Best is trial 93 with value: 0.8613817402823094.


Best trial: 93. Best value: 0.861382:  96%|█████████▌| 96/100 [02:15<00:06,  1.65s/it]

[I 2025-07-10 18:09:45,770] Trial 95 finished with value: 0.8463208940474303 and parameters: {'model': 'XGBoost', 'test_size': 0.13620730430938138, 'random_state': 28, 'xgb_n_estimators': 457, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.18532069187193867, 'xgb_subsample': 0.6044994638068256, 'xgb_colsample_bytree': 0.7166837691061817}. Best is trial 93 with value: 0.8613817402823094.


Best trial: 93. Best value: 0.861382:  97%|█████████▋| 97/100 [02:24<00:11,  3.88s/it]

[I 2025-07-10 18:09:54,846] Trial 96 finished with value: 0.7988984761440631 and parameters: {'model': 'RandomForest', 'test_size': 0.12526865813707894, 'random_state': 30, 'rf_n_estimators': 422, 'rf_max_depth': 17, 'rf_min_samples_split': 2, 'rf_min_samples_leaf': 2}. Best is trial 93 with value: 0.8613817402823094.


Best trial: 93. Best value: 0.861382:  99%|█████████▉| 99/100 [02:26<00:02,  2.32s/it]

[I 2025-07-10 18:09:56,657] Trial 97 finished with value: 0.84443747509957 and parameters: {'model': 'XGBoost', 'test_size': 0.10893125769194115, 'random_state': 26, 'xgb_n_estimators': 473, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.24283547933802024, 'xgb_subsample': 0.5756776231807568, 'xgb_colsample_bytree': 0.6637110643261503}. Best is trial 93 with value: 0.8613817402823094.
[I 2025-07-10 18:09:56,797] Trial 98 finished with value: 0.7473626922222221 and parameters: {'model': 'LinearRegression', 'test_size': 0.11859924861774454, 'random_state': 35}. Best is trial 93 with value: 0.8613817402823094.


Best trial: 93. Best value: 0.861382: 100%|██████████| 100/100 [02:28<00:00,  1.49s/it]

[I 2025-07-10 18:09:58,973] Trial 99 finished with value: 0.8198371854197608 and parameters: {'model': 'XGBoost', 'test_size': 0.13202470157109253, 'random_state': 39, 'xgb_n_estimators': 495, 'xgb_max_depth': 15, 'xgb_learning_rate': 0.19936666316322083, 'xgb_subsample': 0.6308693362517743, 'xgb_colsample_bytree': 0.7104328322922785}. Best is trial 93 with value: 0.8613817402823094.





In [51]:
best_params = study.best_trial.params
print("Best Hyperparameters:")
print(best_params)
print(f"Best R² Score: {study.best_value:.4f}")

Best Hyperparameters:
{'model': 'XGBoost', 'test_size': 0.1347662709076835, 'random_state': 30, 'xgb_n_estimators': 455, 'xgb_max_depth': 14, 'xgb_learning_rate': 0.18579052942455954, 'xgb_subsample': 0.5467891860641703, 'xgb_colsample_bytree': 0.7159168039483006}
Best R² Score: 0.8614


## Store the Model


In [57]:
import pickle

In [52]:
best_model_name = best_params['model']
best_random_state = best_params['random_state']
best_test_size = best_params['test_size']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=best_test_size, random_state=best_random_state)


if best_model_name == 'LinearRegression':
    best_model = LinearRegression()

elif best_model_name == 'RandomForest':
    best_model = RandomForestRegressor(
        n_estimators=best_params['rf_n_estimators'],
        max_depth=best_params['rf_max_depth'],
        min_samples_split=best_params['rf_min_samples_split'],
        min_samples_leaf=best_params['rf_min_samples_leaf'],
        random_state=best_params['random_state'],
        n_jobs=-1
    )

elif best_model_name == 'XGBoost':
    best_model = xgb.XGBRegressor(
        n_estimators=best_params['xgb_n_estimators'],
        max_depth=best_params['xgb_max_depth'],
        learning_rate=best_params['xgb_learning_rate'],
        subsample=best_params['xgb_subsample'],
        colsample_bytree=best_params['xgb_colsample_bytree'],
        random_state=best_params['random_state'],
        n_jobs=-1
    )

final_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', best_model)
])

final_pipeline.fit(X_train, y_train)

0,1,2
,steps,"[('preprocessor', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('cat', ...), ('num', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.7159168039483006
,device,
,early_stopping_rounds,
,enable_categorical,False


In [54]:
y_pred = final_pipeline.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"Test R² Score: {r2:.4f}")


Test R² Score: 0.8614


In [58]:
with open('best_model_pipeline.pkl', 'wb') as f:
    pickle.dump(final_pipeline, f)