In [44]:
from sklearn.metrics import mean_squared_error
import optuna
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import pandas as pd
import numpy as np

from xgb_params import xgb_params

from xgboost import XGBRegressor


In [51]:
train_path = 'processed/processed_train_2.csv'
test_path = 'processed/processed_test_2.csv'
test_id_path = 'processed/processed_test_2.csv'

train_df = pd.read_csv(train_path)
y = train_df['price_doc']
X = train_df.drop(['price_doc'], axis=1)

In [46]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [47]:
def objective(trial):
    params = {
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "n_estimators":  trial.suggest_int("n_estimators", 100, 500),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "min_child_weight": trial.suggest_int('mind_child_weight', 0, 12),
        "gamma": trial.suggest_float("gamma", 0, 1),
    }
    
    scores = []
    # for fold_idx, (train_idx, valid_idx) in enumerate(fold.split(range(len(dataset)))):
    regressor =  XGBRegressor(
                        device=xgb_params['device'],
                        objective=xgb_params['objective'],
                        eval_metric=xgb_params['eval_metric'],
                        enable_categorical=xgb_params['enable_categorical'],
                        early_stopping_rounds=xgb_params['early_stopping_rounds'],
                        n_jobs=xgb_params['n_jobs'],
                        **params
                )
    
    regressor.fit(X_train, y_train, eval_set=[(X_val, y_val)])
    y_pred = regressor.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    return rmse

In [48]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

[I 2023-10-31 17:20:42,963] A new study created in memory with name: no-name-748a8830-5e53-4331-b2a3-e928d654bb04
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62086
[1]	validation_0-rmsle:0.61686
[2]	validation_0-rmsle:0.61301
[3]	validation_0-rmsle:0.60923
[4]	validation_0-rmsle:0.60607
[5]	validation_0-rmsle:0.60287
[6]	validation_0-rmsle:0.59942
[7]	validation_0-rmsle:0.59647
[8]	validation_0-rmsle:0.59323
[9]	validation_0-rmsle:0.59009
[10]	validation_0-rmsle:0.58829
[11]	validation_0-rmsle:0.58539
[12]	validation_0-rmsle:0.58246
[13]	validation_0-rmsle:0.57999
[14]	validation_0-rmsle:0.57781
[15]	validation_0-rmsle:0.57575
[16]	validation_0-rmsle:0.57346
[17]	validation_0-rmsle:0.57129
[18]	validation_0-rmsle:0.56880
[19]	validation_0-rmsle:0.56664
[20]	validation_0-rmsle:0.56519
[21]	validation_0-rmsle:0.56365
[22]	validation_0-rmsle:0.56130
[23]	validation_0-rmsle:0.55911
[24]	validation_0-rmsle:0.55763
[25]	validation_0-rmsle:0.55550
[26]	validation_0-rmsle:0.55338
[27]	validation_0-rmsle:0.55161
[28]	validation_0-rmsle:0.54964
[29]	validation_0-rmsle:0.54771
[30]	validation_0-rmsle:0.54581
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:21:06,820] Trial 0 finished with value: 2634932.044782884 and parameters: {'learning_rate': 0.014875723119694003, 'n_estimators': 178, 'colsample_bytree': 0.5827956195112588, 'max_depth': 10, 'mind_child_weight': 3, 'gamma': 0.6525755257010504}. Best is trial 0 with value: 2634932.044782884.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62329
[1]	validation_0-rmsle:0.62156
[2]	validation_0-rmsle:0.61985


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[3]	validation_0-rmsle:0.61818
[4]	validation_0-rmsle:0.61706
[5]	validation_0-rmsle:0.61581
[6]	validation_0-rmsle:0.61422
[7]	validation_0-rmsle:0.61299
[8]	validation_0-rmsle:0.61144
[9]	validation_0-rmsle:0.60991
[10]	validation_0-rmsle:0.60912
[11]	validation_0-rmsle:0.60765
[12]	validation_0-rmsle:0.60618
[13]	validation_0-rmsle:0.60507
[14]	validation_0-rmsle:0.60411
[15]	validation_0-rmsle:0.60317
[16]	validation_0-rmsle:0.60208
[17]	validation_0-rmsle:0.60106
[18]	validation_0-rmsle:0.59970
[19]	validation_0-rmsle:0.59869
[20]	validation_0-rmsle:0.59804
[21]	validation_0-rmsle:0.59737
[22]	validation_0-rmsle:0.59634
[23]	validation_0-rmsle:0.59504
[24]	validation_0-rmsle:0.59437
[25]	validation_0-rmsle:0.59312
[26]	validation_0-rmsle:0.59220
[27]	validation_0-rmsle:0.59129
[28]	validation_0-rmsle:0.59009
[29]	validation_0-rmsle:0.58889
[30]	validation_0-rmsle:0.58773
[31]	validation_0-rmsle:0.58659
[32]	validation_0-rmsle:0.58546
[33]	validation_0-rmsle:0.58431
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:21:19,522] Trial 1 finished with value: 2767656.0371388607 and parameters: {'learning_rate': 0.007183857727758387, 'n_estimators': 394, 'colsample_bytree': 0.5446507996960555, 'max_depth': 4, 'mind_child_weight': 10, 'gamma': 0.6748157316847088}. Best is trial 0 with value: 2634932.044782884.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62338


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[1]	validation_0-rmsle:0.62177
[2]	validation_0-rmsle:0.62013
[3]	validation_0-rmsle:0.61855
[4]	validation_0-rmsle:0.61733
[5]	validation_0-rmsle:0.61601
[6]	validation_0-rmsle:0.61447
[7]	validation_0-rmsle:0.61320
[8]	validation_0-rmsle:0.61170
[9]	validation_0-rmsle:0.61022
[10]	validation_0-rmsle:0.60941
[11]	validation_0-rmsle:0.60799
[12]	validation_0-rmsle:0.60657
[13]	validation_0-rmsle:0.60519
[14]	validation_0-rmsle:0.60420
[15]	validation_0-rmsle:0.60325
[16]	validation_0-rmsle:0.60214
[17]	validation_0-rmsle:0.60105
[18]	validation_0-rmsle:0.59973
[19]	validation_0-rmsle:0.59864
[20]	validation_0-rmsle:0.59735
[21]	validation_0-rmsle:0.59659
[22]	validation_0-rmsle:0.59531
[23]	validation_0-rmsle:0.59408
[24]	validation_0-rmsle:0.59337
[25]	validation_0-rmsle:0.59215
[26]	validation_0-rmsle:0.59097
[27]	validation_0-rmsle:0.58998
[28]	validation_0-rmsle:0.58882
[29]	validation_0-rmsle:0.58766
[30]	validation_0-rmsle:0.58650
[31]	validation_0-rmsle:0.58537
[32]	validation_0

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:21:31,359] Trial 2 finished with value: 2827055.512839314 and parameters: {'learning_rate': 0.006218709272296433, 'n_estimators': 272, 'colsample_bytree': 0.6333759367320153, 'max_depth': 6, 'mind_child_weight': 12, 'gamma': 0.16625868702562052}. Best is trial 0 with value: 2634932.044782884.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62114
[1]	validation_0-rmsle:0.61733


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[2]	validation_0-rmsle:0.61374
[3]	validation_0-rmsle:0.61037
[4]	validation_0-rmsle:0.60696
[5]	validation_0-rmsle:0.60377
[6]	validation_0-rmsle:0.60066
[7]	validation_0-rmsle:0.59761
[8]	validation_0-rmsle:0.59473
[9]	validation_0-rmsle:0.59190
[10]	validation_0-rmsle:0.58913
[11]	validation_0-rmsle:0.58651
[12]	validation_0-rmsle:0.58393
[13]	validation_0-rmsle:0.58143
[14]	validation_0-rmsle:0.57902
[15]	validation_0-rmsle:0.57667
[16]	validation_0-rmsle:0.57442
[17]	validation_0-rmsle:0.57219
[18]	validation_0-rmsle:0.57000
[19]	validation_0-rmsle:0.56791
[20]	validation_0-rmsle:0.56581
[21]	validation_0-rmsle:0.56384
[22]	validation_0-rmsle:0.56193
[23]	validation_0-rmsle:0.55999
[24]	validation_0-rmsle:0.55812
[25]	validation_0-rmsle:0.55631
[26]	validation_0-rmsle:0.55451
[27]	validation_0-rmsle:0.55279
[28]	validation_0-rmsle:0.55108
[29]	validation_0-rmsle:0.54945
[30]	validation_0-rmsle:0.54782
[31]	validation_0-rmsle:0.54622
[32]	validation_0-rmsle:0.54468
[33]	validation_

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:21:38,387] Trial 3 finished with value: 2709611.0763909006 and parameters: {'learning_rate': 0.014583393334515133, 'n_estimators': 130, 'colsample_bytree': 0.9862275205170049, 'max_depth': 6, 'mind_child_weight': 9, 'gamma': 0.34689866972012007}. Best is trial 0 with value: 2634932.044782884.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61622
[1]	validation_0-rmsle:0.60557
[2]	validation_0-rmsle:0.59681
[3]	validation_0-rmsle:0.58632
[4]	validation_0-rmsle:0.58102
[5]	validation_0-rmsle:0.57397
[6]	validation_0-rmsle:0.56538
[7]	validation_0-rmsle:0.56146
[8]	validation_0-rmsle:0.55391
[9]	validation_0-rmsle:0.54812
[10]	validation_0-rmsle:0.54484
[11]	validation_0-rmsle:0.53879
[12]	validation_0-rmsle:0.53593
[13]	validation_0-rmsle:0.53359
[14]	validation_0-rmsle:0.53122
[15]	validation_0-rmsle:0.52791
[16]	validation_0-rmsle:0.52607
[17]	validation_0-rmsle:0.52423
[18]	validation_0-rmsle:0.51973
[19]	validation_0-rmsle:0.51636
[20]	validation_0-rmsle:0.51464
[21]	validation_0-rmsle:0.51333
[22]	validation_0-rmsle:0.51214
[23]	validation_0-rmsle:0.50858
[24]	validation_0-rmsle:0.50717
[25]	validation_0-rmsle:0.50394
[26]	validation_0-rmsle:0.50155
[27]	validation_0-rmsle:0.50033
[28]	validation_0-rmsle:0.49945
[29]	validation_0-rmsle:0.49782
[30]	validation_0-rmsle:0.49710
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:21:57,188] Trial 4 finished with value: 2639122.9581945683 and parameters: {'learning_rate': 0.04980128180713974, 'n_estimators': 432, 'colsample_bytree': 0.22580869506196688, 'max_depth': 10, 'mind_child_weight': 2, 'gamma': 0.9653478631077574}. Best is trial 0 with value: 2634932.044782884.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61375
[1]	validation_0-rmsle:0.60012


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[2]	validation_0-rmsle:0.58840
[3]	validation_0-rmsle:0.57756
[4]	validation_0-rmsle:0.57043
[5]	validation_0-rmsle:0.56369
[6]	validation_0-rmsle:0.55545
[7]	validation_0-rmsle:0.54994
[8]	validation_0-rmsle:0.54286
[9]	validation_0-rmsle:0.53650
[10]	validation_0-rmsle:0.53329
[11]	validation_0-rmsle:0.52785
[12]	validation_0-rmsle:0.52281
[13]	validation_0-rmsle:0.51971
[14]	validation_0-rmsle:0.51689
[15]	validation_0-rmsle:0.51414
[16]	validation_0-rmsle:0.51106
[17]	validation_0-rmsle:0.50877
[18]	validation_0-rmsle:0.50545
[19]	validation_0-rmsle:0.50306
[20]	validation_0-rmsle:0.50153
[21]	validation_0-rmsle:0.49998
[22]	validation_0-rmsle:0.49795
[23]	validation_0-rmsle:0.49557
[24]	validation_0-rmsle:0.49420
[25]	validation_0-rmsle:0.49201
[26]	validation_0-rmsle:0.49051
[27]	validation_0-rmsle:0.48907
[28]	validation_0-rmsle:0.48734
[29]	validation_0-rmsle:0.48576
[30]	validation_0-rmsle:0.48446
[31]	validation_0-rmsle:0.48304
[32]	validation_0-rmsle:0.48185
[33]	validation_

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:22:06,986] Trial 5 finished with value: 2518595.916451796 and parameters: {'learning_rate': 0.05759996677763831, 'n_estimators': 263, 'colsample_bytree': 0.48198129748503077, 'max_depth': 7, 'mind_child_weight': 8, 'gamma': 0.35060970385365253}. Best is trial 5 with value: 2518595.916451796.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62181
[1]	validation_0-rmsle:0.61868
[2]	validation_0-rmsle:0.61567
[3]	validation_0-rmsle:0.61273
[4]	validation_0-rmsle:0.60981
[5]	validation_0-rmsle:0.60697
[6]	validation_0-rmsle:0.60421
[7]	validation_0-rmsle:0.60172
[8]	validation_0-rmsle:0.59912
[9]	validation_0-rmsle:0.59662
[10]	validation_0-rmsle:0.59413
[11]	validation_0-rmsle:0.59171
[12]	validation_0-rmsle:0.58931
[13]	validation_0-rmsle:0.58703
[14]	validation_0-rmsle:0.58481
[15]	validation_0-rmsle:0.58258
[16]	validation_0-rmsle:0.58042
[17]	validation_0-rmsle:0.57833
[18]	validation_0-rmsle:0.57630
[19]	validation_0-rmsle:0.57430
[20]	validation_0-rmsle:0.57240
[21]	validation_0-rmsle:0.57046
[22]	validation_0-rmsle:0.56855
[23]	validation_0-rmsle:0.56672
[24]	validation_0-rmsle:0.56485
[25]	validation_0-rmsle:0.56313
[26]	validation_0-rmsle:0.56142
[27]	validation_0-rmsle:0.55971
[28]	validation_0-rmsle:0.55801
[29]	validation_0-rmsle:0.55637
[30]	validation_0-rmsle:0.55479
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:22:36,714] Trial 6 finished with value: 2570148.2630401747 and parameters: {'learning_rate': 0.011457741483081494, 'n_estimators': 259, 'colsample_bytree': 0.9537624631881897, 'max_depth': 10, 'mind_child_weight': 9, 'gamma': 0.903901184345972}. Best is trial 5 with value: 2518595.916451796.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62207
[1]	validation_0-rmsle:0.61828
[2]	validation_0-rmsle:0.61496
[3]	validation_0-rmsle:0.61086
[4]	validation_0-rmsle:0.60856
[5]	validation_0-rmsle:0.60541
[6]	validation_0-rmsle:0.60156
[7]	validation_0-rmsle:0.59949
[8]	validation_0-rmsle:0.59588
[9]	validation_0-rmsle:0.59282
[10]	validation_0-rmsle:0.59092
[11]	validation_0-rmsle:0.58762
[12]	validation_0-rmsle:0.58586
[13]	validation_0-rmsle:0.58428
[14]	validation_0-rmsle:0.58267
[15]	validation_0-rmsle:0.58043
[16]	validation_0-rmsle:0.57895
[17]	validation_0-rmsle:0.57734
[18]	validation_0-rmsle:0.57441
[19]	validation_0-rmsle:0.57185
[20]	validation_0-rmsle:0.57039
[21]	validation_0-rmsle:0.56906
[22]	validation_0-rmsle:0.56778
[23]	validation_0-rmsle:0.56519
[24]	validation_0-rmsle:0.56360
[25]	validation_0-rmsle:0.56106
[26]	validation_0-rmsle:0.55891
[27]	validation_0-rmsle:0.55752
[28]	validation_0-rmsle:0.55642
[29]	validation_0-rmsle:0.55478
[30]	validation_0-rmsle:0.55362
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:22:52,756] Trial 7 finished with value: 2770054.9608076587 and parameters: {'learning_rate': 0.016318354908540146, 'n_estimators': 178, 'colsample_bytree': 0.2321068966367153, 'max_depth': 10, 'mind_child_weight': 6, 'gamma': 0.3088053752672122}. Best is trial 5 with value: 2518595.916451796.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62232
[1]	validation_0-rmsle:0.61748
[2]	validation_0-rmsle:0.61499


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[3]	validation_0-rmsle:0.61255
[4]	validation_0-rmsle:0.61043
[5]	validation_0-rmsle:0.60667
[6]	validation_0-rmsle:0.60423
[7]	validation_0-rmsle:0.60249
[8]	validation_0-rmsle:0.59691
[9]	validation_0-rmsle:0.59364
[10]	validation_0-rmsle:0.59194
[11]	validation_0-rmsle:0.59031
[12]	validation_0-rmsle:0.58846
[13]	validation_0-rmsle:0.58683
[14]	validation_0-rmsle:0.58541
[15]	validation_0-rmsle:0.58252
[16]	validation_0-rmsle:0.58121
[17]	validation_0-rmsle:0.58009
[18]	validation_0-rmsle:0.57561
[19]	validation_0-rmsle:0.57425
[20]	validation_0-rmsle:0.57288
[21]	validation_0-rmsle:0.57184
[22]	validation_0-rmsle:0.57087
[23]	validation_0-rmsle:0.56994
[24]	validation_0-rmsle:0.56901
[25]	validation_0-rmsle:0.56817
[26]	validation_0-rmsle:0.56553
[27]	validation_0-rmsle:0.56461
[28]	validation_0-rmsle:0.56357
[29]	validation_0-rmsle:0.56149
[30]	validation_0-rmsle:0.56064
[31]	validation_0-rmsle:0.55991
[32]	validation_0-rmsle:0.55617
[33]	validation_0-rmsle:0.55553
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:23:04,206] Trial 8 finished with value: 2712477.333439247 and parameters: {'learning_rate': 0.028346402727785828, 'n_estimators': 407, 'colsample_bytree': 0.11372660310569079, 'max_depth': 4, 'mind_child_weight': 8, 'gamma': 0.8639894659270476}. Best is trial 5 with value: 2518595.916451796.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62396


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[1]	validation_0-rmsle:0.62265
[2]	validation_0-rmsle:0.62157
[3]	validation_0-rmsle:0.62006
[4]	validation_0-rmsle:0.61904
[5]	validation_0-rmsle:0.61796
[6]	validation_0-rmsle:0.61650
[7]	validation_0-rmsle:0.61547
[8]	validation_0-rmsle:0.61406
[9]	validation_0-rmsle:0.61267
[10]	validation_0-rmsle:0.61196
[11]	validation_0-rmsle:0.61061
[12]	validation_0-rmsle:0.60926
[13]	validation_0-rmsle:0.60860
[14]	validation_0-rmsle:0.60768
[15]	validation_0-rmsle:0.60680
[16]	validation_0-rmsle:0.60575
[17]	validation_0-rmsle:0.60513
[18]	validation_0-rmsle:0.60389
[19]	validation_0-rmsle:0.60287
[20]	validation_0-rmsle:0.60222
[21]	validation_0-rmsle:0.60156
[22]	validation_0-rmsle:0.60097
[23]	validation_0-rmsle:0.59975
[24]	validation_0-rmsle:0.59904
[25]	validation_0-rmsle:0.59781
[26]	validation_0-rmsle:0.59684
[27]	validation_0-rmsle:0.59587
[28]	validation_0-rmsle:0.59471
[29]	validation_0-rmsle:0.59357
[30]	validation_0-rmsle:0.59243
[31]	validation_0-rmsle:0.59130
[32]	validation_0

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:23:10,579] Trial 9 finished with value: 3404502.976310848 and parameters: {'learning_rate': 0.005715685168612022, 'n_estimators': 138, 'colsample_bytree': 0.431828925642093, 'max_depth': 6, 'mind_child_weight': 1, 'gamma': 0.18220591873505765}. Best is trial 5 with value: 2518595.916451796.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60193
[1]	validation_0-rmsle:0.58380
[2]	validation_0-rmsle:0.56835
[3]	validation_0-rmsle:0.55499
[4]	validation_0-rmsle:0.54373
[5]	validation_0-rmsle:0.53485
[6]	validation_0-rmsle:0.52630
[7]	validation_0-rmsle:0.51925
[8]	validation_0-rmsle:0.51273
[9]	validation_0-rmsle:0.50683
[10]	validation_0-rmsle:0.50258
[11]	validation_0-rmsle:0.49818
[12]	validation_0-rmsle:0.49430
[13]	validation_0-rmsle:0.49079
[14]	validation_0-rmsle:0.48778
[15]	validation_0-rmsle:0.48551
[16]	validation_0-rmsle:0.48339
[17]	validation_0-rmsle:0.48139
[18]	validation_0-rmsle:0.47963
[19]	validation_0-rmsle:0.47800
[20]	validation_0-rmsle:0.47675
[21]	validation_0-rmsle:0.47546
[22]	validation_0-rmsle:0.47438
[23]	validation_0-rmsle:0.47355
[24]	validation_0-rmsle:0.47281
[25]	validation_0-rmsle:0.47182
[26]	validation_0-rmsle:0.47117
[27]	validation_0-rmsle:0.47060
[28]	validation_0-rmsle:0.47002
[29]	validation_0-rmsle:0.46945
[30]	validation_0-rmsle:0.46900
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:23:19,807] Trial 10 finished with value: 2497402.646393871 and parameters: {'learning_rate': 0.08792814755559897, 'n_estimators': 329, 'colsample_bytree': 0.7908511501106481, 'max_depth': 8, 'mind_child_weight': 5, 'gamma': 0.018323246422096207}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60001
[1]	validation_0-rmsle:0.58063
[2]	validation_0-rmsle:0.56421
[3]	validation_0-rmsle:0.55033
[4]	validation_0-rmsle:0.53897
[5]	validation_0-rmsle:0.52965
[6]	validation_0-rmsle:0.52072
[7]	validation_0-rmsle:0.51412
[8]	validation_0-rmsle:0.50765
[9]	validation_0-rmsle:0.50205
[10]	validation_0-rmsle:0.49819
[11]	validation_0-rmsle:0.49409
[12]	validation_0-rmsle:0.49047
[13]	validation_0-rmsle:0.48748
[14]	validation_0-rmsle:0.48474
[15]	validation_0-rmsle:0.48268
[16]	validation_0-rmsle:0.48067
[17]	validation_0-rmsle:0.47891
[18]	validation_0-rmsle:0.47751
[19]	validation_0-rmsle:0.47620
[20]	validation_0-rmsle:0.47492
[21]	validation_0-rmsle:0.47372
[22]	validation_0-rmsle:0.47279
[23]	validation_0-rmsle:0.47198
[24]	validation_0-rmsle:0.47132
[25]	validation_0-rmsle:0.47085
[26]	validation_0-rmsle:0.47038
[27]	validation_0-rmsle:0.46976
[28]	validation_0-rmsle:0.46934
[29]	validation_0-rmsle:0.46910
[30]	validation_0-rmsle:0.46865
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:23:26,816] Trial 11 finished with value: 2523952.8152976735 and parameters: {'learning_rate': 0.09603090911943933, 'n_estimators': 334, 'colsample_bytree': 0.781404343281881, 'max_depth': 8, 'mind_child_weight': 5, 'gamma': 0.07196967829151414}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60018
[1]	validation_0-rmsle:0.58067
[2]	validation_0-rmsle:0.56433
[3]	validation_0-rmsle:0.55029
[4]	validation_0-rmsle:0.53862
[5]	validation_0-rmsle:0.52955
[6]	validation_0-rmsle:0.52096
[7]	validation_0-rmsle:0.51424
[8]	validation_0-rmsle:0.50766
[9]	validation_0-rmsle:0.50232
[10]	validation_0-rmsle:0.49846
[11]	validation_0-rmsle:0.49443
[12]	validation_0-rmsle:0.49085
[13]	validation_0-rmsle:0.48760
[14]	validation_0-rmsle:0.48503
[15]	validation_0-rmsle:0.48297
[16]	validation_0-rmsle:0.48106
[17]	validation_0-rmsle:0.47928
[18]	validation_0-rmsle:0.47752
[19]	validation_0-rmsle:0.47600
[20]	validation_0-rmsle:0.47474
[21]	validation_0-rmsle:0.47366
[22]	validation_0-rmsle:0.47276
[23]	validation_0-rmsle:0.47212
[24]	validation_0-rmsle:0.47134
[25]	validation_0-rmsle:0.47064
[26]	validation_0-rmsle:0.47003
[27]	validation_0-rmsle:0.46967
[28]	validation_0-rmsle:0.46926
[29]	validation_0-rmsle:0.46887
[30]	validation_0-rmsle:0.46847
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:23:34,798] Trial 12 finished with value: 2521489.050987576 and parameters: {'learning_rate': 0.09646180717167457, 'n_estimators': 334, 'colsample_bytree': 0.7397102521962408, 'max_depth': 8, 'mind_child_weight': 4, 'gamma': 0.041258148843066}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61623
[1]	validation_0-rmsle:0.60716
[2]	validation_0-rmsle:0.59775
[3]	validation_0-rmsle:0.58880
[4]	validation_0-rmsle:0.58300
[5]	validation_0-rmsle:0.57705
[6]	validation_0-rmsle:0.56997
[7]	validation_0-rmsle:0.56492
[8]	validation_0-rmsle:0.55875
[9]	validation_0-rmsle:0.55287
[10]	validation_0-rmsle:0.54991
[11]	validation_0-rmsle:0.54472
[12]	validation_0-rmsle:0.53983
[13]	validation_0-rmsle:0.53719
[14]	validation_0-rmsle:0.53431
[15]	validation_0-rmsle:0.53131
[16]	validation_0-rmsle:0.52807
[17]	validation_0-rmsle:0.52623
[18]	validation_0-rmsle:0.52251
[19]	validation_0-rmsle:0.51958
[20]	validation_0-rmsle:0.51793
[21]	validation_0-rmsle:0.51618
[22]	validation_0-rmsle:0.51483
[23]	validation_0-rmsle:0.51182
[24]	validation_0-rmsle:0.51017
[25]	validation_0-rmsle:0.50762
[26]	validation_0-rmsle:0.50554
[27]	validation_0-rmsle:0.50374
[28]	validation_0-rmsle:0.50146
[29]	validation_0-rmsle:0.49954
[30]	validation_0-rmsle:0.49748
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:23:47,894] Trial 13 finished with value: 2529540.7033675127 and parameters: {'learning_rate': 0.04138202501043667, 'n_estimators': 231, 'colsample_bytree': 0.4573588270935012, 'max_depth': 8, 'mind_child_weight': 7, 'gamma': 0.3876747068172507}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.62446


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[1]	validation_0-rmsle:0.62385
[2]	validation_0-rmsle:0.62325
[3]	validation_0-rmsle:0.62265
[4]	validation_0-rmsle:0.62205
[5]	validation_0-rmsle:0.62152
[6]	validation_0-rmsle:0.62093
[7]	validation_0-rmsle:0.62042
[8]	validation_0-rmsle:0.61982
[9]	validation_0-rmsle:0.61923
[10]	validation_0-rmsle:0.61875
[11]	validation_0-rmsle:0.61817
[12]	validation_0-rmsle:0.61759
[13]	validation_0-rmsle:0.61702
[14]	validation_0-rmsle:0.61645
[15]	validation_0-rmsle:0.61589
[16]	validation_0-rmsle:0.61534
[17]	validation_0-rmsle:0.61478
[18]	validation_0-rmsle:0.61423
[19]	validation_0-rmsle:0.61368
[20]	validation_0-rmsle:0.61313
[21]	validation_0-rmsle:0.61260
[22]	validation_0-rmsle:0.61207
[23]	validation_0-rmsle:0.61153
[24]	validation_0-rmsle:0.61107
[25]	validation_0-rmsle:0.61056
[26]	validation_0-rmsle:0.61003
[27]	validation_0-rmsle:0.60951
[28]	validation_0-rmsle:0.60898
[29]	validation_0-rmsle:0.60847
[30]	validation_0-rmsle:0.60795
[31]	validation_0-rmsle:0.60744
[32]	validation_0

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:24:15,611] Trial 14 finished with value: 3024648.559973481 and parameters: {'learning_rate': 0.0022083470445269844, 'n_estimators': 480, 'colsample_bytree': 0.8108495357562854, 'max_depth': 7, 'mind_child_weight': 6, 'gamma': 0.03319658231331236}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61585


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[1]	validation_0-rmsle:0.60388
[2]	validation_0-rmsle:0.59514
[3]	validation_0-rmsle:0.58375
[4]	validation_0-rmsle:0.57679
[5]	validation_0-rmsle:0.56980
[6]	validation_0-rmsle:0.56087
[7]	validation_0-rmsle:0.55540
[8]	validation_0-rmsle:0.54812
[9]	validation_0-rmsle:0.54252
[10]	validation_0-rmsle:0.53946
[11]	validation_0-rmsle:0.53367
[12]	validation_0-rmsle:0.52821
[13]	validation_0-rmsle:0.52588
[14]	validation_0-rmsle:0.52307
[15]	validation_0-rmsle:0.52033
[16]	validation_0-rmsle:0.51713
[17]	validation_0-rmsle:0.51546
[18]	validation_0-rmsle:0.51151
[19]	validation_0-rmsle:0.50882
[20]	validation_0-rmsle:0.50734
[21]	validation_0-rmsle:0.50584
[22]	validation_0-rmsle:0.50481
[23]	validation_0-rmsle:0.50184
[24]	validation_0-rmsle:0.50043
[25]	validation_0-rmsle:0.49777
[26]	validation_0-rmsle:0.49598
[27]	validation_0-rmsle:0.49432
[28]	validation_0-rmsle:0.49316
[29]	validation_0-rmsle:0.49109
[30]	validation_0-rmsle:0.48919
[31]	validation_0-rmsle:0.48751
[32]	validation_0

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:24:26,764] Trial 15 finished with value: 2522119.714864219 and parameters: {'learning_rate': 0.05641208939702462, 'n_estimators': 325, 'colsample_bytree': 0.37429643913452065, 'max_depth': 7, 'mind_child_weight': 11, 'gamma': 0.5062032990648522}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61779
[1]	validation_0-rmsle:0.61104


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[2]	validation_0-rmsle:0.60471
[3]	validation_0-rmsle:0.59889
[4]	validation_0-rmsle:0.59348
[5]	validation_0-rmsle:0.58909
[6]	validation_0-rmsle:0.58415
[7]	validation_0-rmsle:0.58019
[8]	validation_0-rmsle:0.57567
[9]	validation_0-rmsle:0.57149
[10]	validation_0-rmsle:0.56813
[11]	validation_0-rmsle:0.56432
[12]	validation_0-rmsle:0.56077
[13]	validation_0-rmsle:0.55737
[14]	validation_0-rmsle:0.55418
[15]	validation_0-rmsle:0.55182
[16]	validation_0-rmsle:0.54927
[17]	validation_0-rmsle:0.54673
[18]	validation_0-rmsle:0.54393
[19]	validation_0-rmsle:0.54167
[20]	validation_0-rmsle:0.53912
[21]	validation_0-rmsle:0.53660
[22]	validation_0-rmsle:0.53417
[23]	validation_0-rmsle:0.53192
[24]	validation_0-rmsle:0.53023
[25]	validation_0-rmsle:0.52816
[26]	validation_0-rmsle:0.52621
[27]	validation_0-rmsle:0.52461
[28]	validation_0-rmsle:0.52277
[29]	validation_0-rmsle:0.52104
[30]	validation_0-rmsle:0.51932
[31]	validation_0-rmsle:0.51769
[32]	validation_0-rmsle:0.51608
[33]	validation_

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:24:35,186] Trial 16 finished with value: 2541124.205637563 and parameters: {'learning_rate': 0.028546798220416707, 'n_estimators': 224, 'colsample_bytree': 0.6868095603767156, 'max_depth': 5, 'mind_child_weight': 0, 'gamma': 0.2535442156375711}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60122
[1]	validation_0-rmsle:0.58102
[2]	validation_0-rmsle:0.56465
[3]	validation_0-rmsle:0.55072
[4]	validation_0-rmsle:0.54043
[5]	validation_0-rmsle:0.53116
[6]	validation_0-rmsle:0.52213
[7]	validation_0-rmsle:0.51562
[8]	validation_0-rmsle:0.50872
[9]	validation_0-rmsle:0.50290
[10]	validation_0-rmsle:0.49969
[11]	validation_0-rmsle:0.49517
[12]	validation_0-rmsle:0.49138
[13]	validation_0-rmsle:0.48849
[14]	validation_0-rmsle:0.48645
[15]	validation_0-rmsle:0.48449
[16]	validation_0-rmsle:0.48245
[17]	validation_0-rmsle:0.48060
[18]	validation_0-rmsle:0.47864
[19]	validation_0-rmsle:0.47719
[20]	validation_0-rmsle:0.47618
[21]	validation_0-rmsle:0.47541
[22]	validation_0-rmsle:0.47437
[23]	validation_0-rmsle:0.47324
[24]	validation_0-rmsle:0.47253
[25]	validation_0-rmsle:0.47167
[26]	validation_0-rmsle:0.47101
[27]	validation_0-rmsle:0.47042
[28]	validation_0-rmsle:0.46973
[29]	validation_0-rmsle:0.46900
[30]	validation_0-rmsle:0.46839
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:24:45,692] Trial 17 finished with value: 2559042.090588935 and parameters: {'learning_rate': 0.09443838731740678, 'n_estimators': 357, 'colsample_bytree': 0.5522349515639541, 'max_depth': 9, 'mind_child_weight': 4, 'gamma': 0.1725624064764223}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61212
[1]	validation_0-rmsle:0.60157
[2]	validation_0-rmsle:0.59239


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[3]	validation_0-rmsle:0.58380
[4]	validation_0-rmsle:0.57657
[5]	validation_0-rmsle:0.57106
[6]	validation_0-rmsle:0.56507
[7]	validation_0-rmsle:0.56009
[8]	validation_0-rmsle:0.55510
[9]	validation_0-rmsle:0.55019
[10]	validation_0-rmsle:0.54615
[11]	validation_0-rmsle:0.54205
[12]	validation_0-rmsle:0.53823
[13]	validation_0-rmsle:0.53522
[14]	validation_0-rmsle:0.53245
[15]	validation_0-rmsle:0.52960
[16]	validation_0-rmsle:0.52654
[17]	validation_0-rmsle:0.52393
[18]	validation_0-rmsle:0.52174
[19]	validation_0-rmsle:0.51957
[20]	validation_0-rmsle:0.51758
[21]	validation_0-rmsle:0.51537
[22]	validation_0-rmsle:0.51349
[23]	validation_0-rmsle:0.51179
[24]	validation_0-rmsle:0.50999
[25]	validation_0-rmsle:0.50820
[26]	validation_0-rmsle:0.50666
[27]	validation_0-rmsle:0.50562
[28]	validation_0-rmsle:0.50402
[29]	validation_0-rmsle:0.50302
[30]	validation_0-rmsle:0.50147
[31]	validation_0-rmsle:0.50069
[32]	validation_0-rmsle:0.49976
[33]	validation_0-rmsle:0.49859
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:24:54,073] Trial 18 finished with value: 2541834.188554559 and parameters: {'learning_rate': 0.06105078982940538, 'n_estimators': 293, 'colsample_bytree': 0.8309018406221209, 'max_depth': 3, 'mind_child_weight': 7, 'gamma': 0.4380977575505434}. Best is trial 10 with value: 2497402.646393871.
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61491
[1]	validation_0-rmsle:0.60587
[2]	validation_0-rmsle:0.59725
[3]	validation_0-rmsle:0.58922
[4]	validation_0-rmsle:0.58203
[5]	validation_0-rmsle:0.57576
[6]	validation_0-rmsle:0.56948
[7]	validation_0-rmsle:0.56413
[8]	validation_0-rmsle:0.55847
[9]	validation_0-rmsle:0.55329
[10]	validation_0-rmsle:0.55008
[11]	validation_0-rmsle:0.54537
[12]	validation_0-rmsle:0.54088
[13]	validation_0-rmsle:0.53669
[14]	validation_0-rmsle:0.53355
[15]	validation_0-rmsle:0.53056
[16]	validation_0-rmsle:0.52743
[17]	validation_0-rmsle:0.52438
[18]	validation_0-rmsle:0.52112
[19]	validation_0-rmsle:0.51833
[20]	validation_0-rmsle:0.51537
[21]	validation_0-rmsle:0.51256
[22]	validation_0-rmsle:0.50991
[23]	validation_0-rmsle:0.50760
[24]	validation_0-rmsle:0.50595
[25]	validation_0-rmsle:0.50364
[26]	validation_0-rmsle:0.50148
[27]	validation_0-rmsle:0.49975
[28]	validation_0-rmsle:0.49791
[29]	validation_0-rmsle:0.49609
[30]	validation_0-rmsle:0.49439
[31]	validation_0-

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
[I 2023-10-31 17:25:09,233] Trial 19 finished with value: 2514679.60391564 and parameters: {'learning_rate': 0.03710256763343106, 'n_estimators': 497, 'colsample_bytree': 0.6650679946678821, 'max_depth': 9, 'mind_child_weight': 8, 'gamma': 0.25589456925418197}. Best is trial 10 with value: 2497402.646393871.


In [49]:
best_params = study.best_params
best_params

{'learning_rate': 0.08792814755559897,
 'n_estimators': 329,
 'colsample_bytree': 0.7908511501106481,
 'max_depth': 8,
 'mind_child_weight': 5,
 'gamma': 0.018323246422096207}

In [58]:
best_params['max_depth'] = 5

In [59]:
test = pd.read_csv(test_path)
test_id = pd.read_csv(test_id_path)

In [60]:
def train(X, y, evalset=None):    
    model = XGBRegressor(
                        device=xgb_params['device'],
                        objective=xgb_params['objective'],
                        eval_metric=xgb_params['eval_metric'],
                        enable_categorical=xgb_params['enable_categorical'],
                        early_stopping_rounds=xgb_params['early_stopping_rounds'],
                        n_jobs=xgb_params['n_jobs'],
                        **best_params,
                )
    if not evalset:
        model.fit(X, y, verbose=True)
    else:
        model.fit(X, y, eval_set=evalset, verbose=True)

    return model

def find_best_model(processed_df):
    # Return best model from kfolds
    best_model = None
    min_loss = float('inf')
    cv = KFold(n_splits=10, shuffle=True, random_state=42)
    for fold, (train_idx, test_idx) in enumerate(cv.split(processed_df)):
        X_train = processed_df.iloc[train_idx]
        y_train = X_train["price_doc"]
        X_train.drop(["price_doc"], axis=1, inplace=True)

        X_val = processed_df.iloc[test_idx]
        y_val = X_val["price_doc"]
        X_val.drop(["price_doc"], axis=1, inplace=True)

        evalset = [(X_val, y_val)]
        model = train(X_train, y_train, evalset)

        pred = model.predict(X_val)
        loss = mean_squared_error(y_val, pred, squared=False)

        if loss < min_loss:
            min_loss = loss
            best_model = model
    
    return best_model, min_loss

In [61]:
model, loss = find_best_model(train_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.61386
[1]	validation_0-rmsle:0.59620
[2]	validation_0-rmsle:0.58214


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[3]	validation_0-rmsle:0.57009
[4]	validation_0-rmsle:0.56025
[5]	validation_0-rmsle:0.55295
[6]	validation_0-rmsle:0.54498
[7]	validation_0-rmsle:0.53902
[8]	validation_0-rmsle:0.53308
[9]	validation_0-rmsle:0.52804
[10]	validation_0-rmsle:0.52432
[11]	validation_0-rmsle:0.52017
[12]	validation_0-rmsle:0.51639
[13]	validation_0-rmsle:0.51325
[14]	validation_0-rmsle:0.51056
[15]	validation_0-rmsle:0.50843
[16]	validation_0-rmsle:0.50635
[17]	validation_0-rmsle:0.50431
[18]	validation_0-rmsle:0.50238
[19]	validation_0-rmsle:0.50083
[20]	validation_0-rmsle:0.49925
[21]	validation_0-rmsle:0.49788
[22]	validation_0-rmsle:0.49651
[23]	validation_0-rmsle:0.49545
[24]	validation_0-rmsle:0.49449
[25]	validation_0-rmsle:0.49361
[26]	validation_0-rmsle:0.49281
[27]	validation_0-rmsle:0.49178
[28]	validation_0-rmsle:0.49104
[29]	validation_0-rmsle:0.49004
[30]	validation_0-rmsle:0.48940
[31]	validation_0-rmsle:0.48872
[32]	validation_0-rmsle:0.48834
[33]	validation_0-rmsle:0.48769
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.59429
[1]	validation_0-rmsle:0.57737
[2]	validation_0-rmsle:0.56304
[3]	validation_0-rmsle:0.55063


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[4]	validation_0-rmsle:0.54072
[5]	validation_0-rmsle:0.53247
[6]	validation_0-rmsle:0.52487
[7]	validation_0-rmsle:0.51839
[8]	validation_0-rmsle:0.51242
[9]	validation_0-rmsle:0.50695
[10]	validation_0-rmsle:0.50307
[11]	validation_0-rmsle:0.49879
[12]	validation_0-rmsle:0.49507
[13]	validation_0-rmsle:0.49176
[14]	validation_0-rmsle:0.48869
[15]	validation_0-rmsle:0.48621
[16]	validation_0-rmsle:0.48405
[17]	validation_0-rmsle:0.48175
[18]	validation_0-rmsle:0.47997
[19]	validation_0-rmsle:0.47849
[20]	validation_0-rmsle:0.47727
[21]	validation_0-rmsle:0.47618
[22]	validation_0-rmsle:0.47483
[23]	validation_0-rmsle:0.47355
[24]	validation_0-rmsle:0.47253
[25]	validation_0-rmsle:0.47154
[26]	validation_0-rmsle:0.47068
[27]	validation_0-rmsle:0.47012
[28]	validation_0-rmsle:0.46947
[29]	validation_0-rmsle:0.46865
[30]	validation_0-rmsle:0.46798
[31]	validation_0-rmsle:0.46726
[32]	validation_0-rmsle:0.46670
[33]	validation_0-rmsle:0.46618
[34]	validation_0-rmsle:0.46579
[35]	validatio

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.58967
[1]	validation_0-rmsle:0.57265
[2]	validation_0-rmsle:0.55854


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[3]	validation_0-rmsle:0.54645
[4]	validation_0-rmsle:0.53650
[5]	validation_0-rmsle:0.52848
[6]	validation_0-rmsle:0.52155
[7]	validation_0-rmsle:0.51535
[8]	validation_0-rmsle:0.50989
[9]	validation_0-rmsle:0.50456
[10]	validation_0-rmsle:0.50061
[11]	validation_0-rmsle:0.49657
[12]	validation_0-rmsle:0.49314
[13]	validation_0-rmsle:0.48980
[14]	validation_0-rmsle:0.48698
[15]	validation_0-rmsle:0.48489
[16]	validation_0-rmsle:0.48280
[17]	validation_0-rmsle:0.48108
[18]	validation_0-rmsle:0.47945
[19]	validation_0-rmsle:0.47791
[20]	validation_0-rmsle:0.47669
[21]	validation_0-rmsle:0.47549
[22]	validation_0-rmsle:0.47435
[23]	validation_0-rmsle:0.47336
[24]	validation_0-rmsle:0.47250
[25]	validation_0-rmsle:0.47171
[26]	validation_0-rmsle:0.47098
[27]	validation_0-rmsle:0.47035
[28]	validation_0-rmsle:0.46952
[29]	validation_0-rmsle:0.46894
[30]	validation_0-rmsle:0.46825
[31]	validation_0-rmsle:0.46775
[32]	validation_0-rmsle:0.46752
[33]	validation_0-rmsle:0.46702
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60264
[1]	validation_0-rmsle:0.58591
[2]	validation_0-rmsle:0.57278
[3]	validation_0-rmsle:0.56129


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[4]	validation_0-rmsle:0.55124
[5]	validation_0-rmsle:0.54355
[6]	validation_0-rmsle:0.53626
[7]	validation_0-rmsle:0.53052
[8]	validation_0-rmsle:0.52508
[9]	validation_0-rmsle:0.52012
[10]	validation_0-rmsle:0.51670
[11]	validation_0-rmsle:0.51265
[12]	validation_0-rmsle:0.50930
[13]	validation_0-rmsle:0.50639
[14]	validation_0-rmsle:0.50361
[15]	validation_0-rmsle:0.50131
[16]	validation_0-rmsle:0.49948
[17]	validation_0-rmsle:0.49729
[18]	validation_0-rmsle:0.49571
[19]	validation_0-rmsle:0.49411
[20]	validation_0-rmsle:0.49270
[21]	validation_0-rmsle:0.49164
[22]	validation_0-rmsle:0.49059
[23]	validation_0-rmsle:0.48957
[24]	validation_0-rmsle:0.48866
[25]	validation_0-rmsle:0.48787
[26]	validation_0-rmsle:0.48694
[27]	validation_0-rmsle:0.48629
[28]	validation_0-rmsle:0.48572
[29]	validation_0-rmsle:0.48504
[30]	validation_0-rmsle:0.48457
[31]	validation_0-rmsle:0.48401
[32]	validation_0-rmsle:0.48346
[33]	validation_0-rmsle:0.48293
[34]	validation_0-rmsle:0.48258
[35]	validatio

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60594
[1]	validation_0-rmsle:0.58773
[2]	validation_0-rmsle:0.57270
[3]	validation_0-rmsle:0.56017
[4]	validation_0-rmsle:0.54938


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[5]	validation_0-rmsle:0.54092
[6]	validation_0-rmsle:0.53291
[7]	validation_0-rmsle:0.52625
[8]	validation_0-rmsle:0.51938
[9]	validation_0-rmsle:0.51391
[10]	validation_0-rmsle:0.50958
[11]	validation_0-rmsle:0.50525
[12]	validation_0-rmsle:0.50116
[13]	validation_0-rmsle:0.49759
[14]	validation_0-rmsle:0.49453
[15]	validation_0-rmsle:0.49242
[16]	validation_0-rmsle:0.49030
[17]	validation_0-rmsle:0.48786
[18]	validation_0-rmsle:0.48578
[19]	validation_0-rmsle:0.48420
[20]	validation_0-rmsle:0.48257
[21]	validation_0-rmsle:0.48134
[22]	validation_0-rmsle:0.47995
[23]	validation_0-rmsle:0.47866
[24]	validation_0-rmsle:0.47764
[25]	validation_0-rmsle:0.47646
[26]	validation_0-rmsle:0.47543
[27]	validation_0-rmsle:0.47433
[28]	validation_0-rmsle:0.47334
[29]	validation_0-rmsle:0.47258
[30]	validation_0-rmsle:0.47187
[31]	validation_0-rmsle:0.47138
[32]	validation_0-rmsle:0.47078
[33]	validation_0-rmsle:0.47021
[34]	validation_0-rmsle:0.46969
[35]	validation_0-rmsle:0.46937
[36]	validati

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60903
[1]	validation_0-rmsle:0.59167
[2]	validation_0-rmsle:0.57696
[3]	validation_0-rmsle:0.56422


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[4]	validation_0-rmsle:0.55377
[5]	validation_0-rmsle:0.54531
[6]	validation_0-rmsle:0.53782
[7]	validation_0-rmsle:0.53122
[8]	validation_0-rmsle:0.52511
[9]	validation_0-rmsle:0.51992
[10]	validation_0-rmsle:0.51575
[11]	validation_0-rmsle:0.51154
[12]	validation_0-rmsle:0.50780
[13]	validation_0-rmsle:0.50425
[14]	validation_0-rmsle:0.50149
[15]	validation_0-rmsle:0.49908
[16]	validation_0-rmsle:0.49691
[17]	validation_0-rmsle:0.49499
[18]	validation_0-rmsle:0.49282
[19]	validation_0-rmsle:0.49097
[20]	validation_0-rmsle:0.48949
[21]	validation_0-rmsle:0.48810
[22]	validation_0-rmsle:0.48669
[23]	validation_0-rmsle:0.48558
[24]	validation_0-rmsle:0.48459
[25]	validation_0-rmsle:0.48353
[26]	validation_0-rmsle:0.48256
[27]	validation_0-rmsle:0.48155
[28]	validation_0-rmsle:0.48075
[29]	validation_0-rmsle:0.48007
[30]	validation_0-rmsle:0.47952
[31]	validation_0-rmsle:0.47901
[32]	validation_0-rmsle:0.47852
[33]	validation_0-rmsle:0.47808
[34]	validation_0-rmsle:0.47762
[35]	validatio

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60816
[1]	validation_0-rmsle:0.59297
[2]	validation_0-rmsle:0.58038


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[3]	validation_0-rmsle:0.57000
[4]	validation_0-rmsle:0.56141
[5]	validation_0-rmsle:0.55406
[6]	validation_0-rmsle:0.54782
[7]	validation_0-rmsle:0.54249
[8]	validation_0-rmsle:0.53750
[9]	validation_0-rmsle:0.53316
[10]	validation_0-rmsle:0.53002
[11]	validation_0-rmsle:0.52659
[12]	validation_0-rmsle:0.52372
[13]	validation_0-rmsle:0.52095
[14]	validation_0-rmsle:0.51883
[15]	validation_0-rmsle:0.51682
[16]	validation_0-rmsle:0.51502
[17]	validation_0-rmsle:0.51354
[18]	validation_0-rmsle:0.51204
[19]	validation_0-rmsle:0.51071
[20]	validation_0-rmsle:0.50941
[21]	validation_0-rmsle:0.50844
[22]	validation_0-rmsle:0.50751
[23]	validation_0-rmsle:0.50689
[24]	validation_0-rmsle:0.50591
[25]	validation_0-rmsle:0.50512
[26]	validation_0-rmsle:0.50440
[27]	validation_0-rmsle:0.50386
[28]	validation_0-rmsle:0.50321
[29]	validation_0-rmsle:0.50268
[30]	validation_0-rmsle:0.50233
[31]	validation_0-rmsle:0.50166
[32]	validation_0-rmsle:0.50132
[33]	validation_0-rmsle:0.50085
[34]	validation

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60798
[1]	validation_0-rmsle:0.59269
[2]	validation_0-rmsle:0.57984
[3]	validation_0-rmsle:0.56928


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[4]	validation_0-rmsle:0.56063
[5]	validation_0-rmsle:0.55367
[6]	validation_0-rmsle:0.54684
[7]	validation_0-rmsle:0.54185
[8]	validation_0-rmsle:0.53707
[9]	validation_0-rmsle:0.53268
[10]	validation_0-rmsle:0.52911
[11]	validation_0-rmsle:0.52540
[12]	validation_0-rmsle:0.52265
[13]	validation_0-rmsle:0.51975
[14]	validation_0-rmsle:0.51715
[15]	validation_0-rmsle:0.51509
[16]	validation_0-rmsle:0.51301
[17]	validation_0-rmsle:0.51136
[18]	validation_0-rmsle:0.50986
[19]	validation_0-rmsle:0.50824
[20]	validation_0-rmsle:0.50714
[21]	validation_0-rmsle:0.50588
[22]	validation_0-rmsle:0.50479
[23]	validation_0-rmsle:0.50370
[24]	validation_0-rmsle:0.50276
[25]	validation_0-rmsle:0.50214
[26]	validation_0-rmsle:0.50152
[27]	validation_0-rmsle:0.50094
[28]	validation_0-rmsle:0.50026
[29]	validation_0-rmsle:0.49967
[30]	validation_0-rmsle:0.49917
[31]	validation_0-rmsle:0.49872
[32]	validation_0-rmsle:0.49823
[33]	validation_0-rmsle:0.49773
[34]	validation_0-rmsle:0.49745
[35]	validatio

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.63113
[1]	validation_0-rmsle:0.61348


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[2]	validation_0-rmsle:0.59890
[3]	validation_0-rmsle:0.58676
[4]	validation_0-rmsle:0.57683
[5]	validation_0-rmsle:0.56895
[6]	validation_0-rmsle:0.56099
[7]	validation_0-rmsle:0.55500
[8]	validation_0-rmsle:0.54876
[9]	validation_0-rmsle:0.54354
[10]	validation_0-rmsle:0.53936
[11]	validation_0-rmsle:0.53531
[12]	validation_0-rmsle:0.53176
[13]	validation_0-rmsle:0.52849
[14]	validation_0-rmsle:0.52587
[15]	validation_0-rmsle:0.52323
[16]	validation_0-rmsle:0.52111
[17]	validation_0-rmsle:0.51928
[18]	validation_0-rmsle:0.51755
[19]	validation_0-rmsle:0.51607
[20]	validation_0-rmsle:0.51453
[21]	validation_0-rmsle:0.51336
[22]	validation_0-rmsle:0.51215
[23]	validation_0-rmsle:0.51111
[24]	validation_0-rmsle:0.51010
[25]	validation_0-rmsle:0.50932
[26]	validation_0-rmsle:0.50858
[27]	validation_0-rmsle:0.50792
[28]	validation_0-rmsle:0.50738
[29]	validation_0-rmsle:0.50689
[30]	validation_0-rmsle:0.50635
[31]	validation_0-rmsle:0.50582
[32]	validation_0-rmsle:0.50524
[33]	validation_

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.drop(["price_doc"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val.drop(["price_doc"], axis=1, inplace=True)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


[0]	validation_0-rmsle:0.60356
[1]	validation_0-rmsle:0.58674
[2]	validation_0-rmsle:0.57290
[3]	validation_0-rmsle:0.56099


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
Parameters: { "mind_child_weight" } are not used.



[4]	validation_0-rmsle:0.55085
[5]	validation_0-rmsle:0.54281
[6]	validation_0-rmsle:0.53497
[7]	validation_0-rmsle:0.52893
[8]	validation_0-rmsle:0.52299
[9]	validation_0-rmsle:0.51745
[10]	validation_0-rmsle:0.51356
[11]	validation_0-rmsle:0.50943
[12]	validation_0-rmsle:0.50543
[13]	validation_0-rmsle:0.50205
[14]	validation_0-rmsle:0.49935
[15]	validation_0-rmsle:0.49714
[16]	validation_0-rmsle:0.49452
[17]	validation_0-rmsle:0.49239
[18]	validation_0-rmsle:0.49071
[19]	validation_0-rmsle:0.48908
[20]	validation_0-rmsle:0.48771
[21]	validation_0-rmsle:0.48630
[22]	validation_0-rmsle:0.48532
[23]	validation_0-rmsle:0.48420
[24]	validation_0-rmsle:0.48335
[25]	validation_0-rmsle:0.48256
[26]	validation_0-rmsle:0.48149
[27]	validation_0-rmsle:0.48085
[28]	validation_0-rmsle:0.48015
[29]	validation_0-rmsle:0.47939
[30]	validation_0-rmsle:0.47883
[31]	validation_0-rmsle:0.47842
[32]	validation_0-rmsle:0.47789
[33]	validation_0-rmsle:0.47741
[34]	validation_0-rmsle:0.47689
[35]	validatio

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [62]:
print("Loss: ", loss)
pred = model.predict(test.drop(["id"], axis=1))

Loss:  2324506.2016246263


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [63]:
# Save predictions
prediction_df = pd.DataFrame({
    'id': test_id['id'],
    'price_doc': pred
})

prediction_df.to_csv('./output/xgb_pred.csv', index=False)