In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error, r2_score, mean_squared_error

import lightgbm as lgb

import sys
sys.path.append('../lightgbm')

In [2]:
from sklearn.metrics import mean_squared_error
import optuna

def preprocess(data):
    #add relative floor
    data['rel_floor'] = data['floor']/data['max_floor']
    #add relative kitch_sq
    data['relative_kitch_sq'] = data['kitch_sq']/data['full_sq']
    #add room size
    data['room_size'] = data['life_sq']/data['num_room']
    # add month and day of week
    data['month'] = data.timestamp.dt.month.astype(int)
    data['day_of_week'] = data.timestamp.dt.dayofweek.astype(int)
    
    data['bought_minus_built'] = data.timestamp.dt.year.astype(int) - data['build_year']
    
    data.loc[data['full_sq']==data['life_sq'],'life_sq'] = np.nan

    return data
def fact_binary(data):
    data = data.applymap(lambda x: x if x!='yes' else 1)
    data = data.applymap(lambda x: x if x!='no' else 0)
    data = data.applymap(lambda x: x if x!='OwnerOccupier' else 0)
    data = data.applymap(lambda x: x if x!='Investment' else 1)
    return data

class sep_estimator:
    def __init__(self,owner_params,investment_params):
        self.est1 = lgb.LGBMRegressor(**owner_params)
        self.est2 = lgb.LGBMRegressor(**investment_params)
        
    def preprocess_owner(self,data,mode):
        assert (data['product_type'].values==0).all()
        if mode=='predict':
            data.loc[data['full_sq'].isnull(),'full_sq'] = 50
        data = data.drop('timestamp',axis=1)
        return data
    
    def preprocess_investment(self,data,mode):
        if mode=='predict':
            assert (data['product_type'].values==1).all()
            data.loc[data['full_sq'].isnull(),'full_sq'] = 50
        data = data.drop('timestamp',axis=1)
        return data
    
    def tuneHyperParams(self,X,y,size):
        def objective(trial):
            if (size == 'small'):
                params = {
                    "objective": "regression",
                    "metric": "rmse",
                    "verbosity": -1,
                    "n_estimators": trial.suggest_int("n_estimators", 100, 500),
    #                 "boosting": trial.suggest_categorical("boosting", ["gbdt", "rf", "dart"]),
                    "lambda_l2": trial.suggest_float("lambda_l2", 0, 10),
                    "bagging_fraction": trial.suggest_float("bagging_fraction", 0.5, 1),
                    "bagging_freq": 1,
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**8),
                    "feature_fraction": trial.suggest_float("feature_fraction", 0.5, 1),
                    "max_depth": trial.suggest_int("max_depth", 5, 10),
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "subsample": trial.suggest_float("subsample", 0.01, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.01, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 50),
                    "max_bin": trial.suggest_int("max_bin", 128, 512),
                }
            else:
                params = {
                    "objective": "regression",
                    "metric": "rmse",
                    "verbosity": -1,
                    "n_estimators": trial.suggest_int("n_estimators", 600, 1000),
    #                 "boosting": trial.suggest_categorical("boosting", ["gbdt", "rf", "dart"]),
                    "lambda_l2": trial.suggest_float("lambda_l2", 0, 10),
                    "bagging_fraction": trial.suggest_float("bagging_fraction", 0.5, 1),
                    "bagging_freq": 1,
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
                    "feature_fraction": trial.suggest_float("feature_fraction", 0.5, 1),
                    "max_depth": trial.suggest_int("max_depth", 5, 15),
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "subsample": trial.suggest_float("subsample", 0.01, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.01, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 100),
                    "max_bin": trial.suggest_int("max_bin", 128, 512),
                }

            model = lgb.LGBMRegressor(**params)
            model.fit(X_train, y_train, verbose=False)
            predictions = model.predict(X_val)
            rmse = mean_squared_error(y_val, predictions, squared=False)
            return rmse
        
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        
        study = optuna.create_study(direction='minimize')
        study.optimize(objective, n_trials=50)
        
        print('Best hyperparameters:', study.best_params)
        print('Best RMSE:', study.best_value)
        return study.best_params
    
    def fit(self,X,y):
        X1 = X[X['product_type']==0]
        X2 = X
        X1 = self.preprocess_owner(X1,'train')
        y1 = y.loc[X1.index.values]/X1['full_sq']
        
        new_owner_params = self.tuneHyperParams(X1,y1, 'small')
        self.est1 = lgb.LGBMRegressor(**new_owner_params)
        
        X2 = self.preprocess_investment(X2,'train')
        y2 = y
        y2 = y2/X2['full_sq']
        
        new_investment_params = self.tuneHyperParams(X2,y2, 'large')
        self.est2 = lgb.LGBMRegressor(**new_investment_params)
        
        if len(X1)>0:
            self.est1.fit(X1,y1)
        if len(X2)>0:
            self.est2.fit(X2,y2)
    
    def predict(self,X):
        X1 = X[X['product_type']==0]
        X2 = X[X['product_type']==1]
        owner_index = X1.index.values
        investment_index = X.index.drop(owner_index).values
        X1 = self.preprocess_owner(X1,'predict')
        X2 = self.preprocess_investment(X2,'predict')
        res = pd.DataFrame(index=X.index)
        if len(X1)>0:
            pred1 = self.est1.predict(X1)
            res.loc[owner_index,0] = pred1*X1['full_sq']
        if len(X2)>0:
            pred2 = self.est2.predict(X2)
            res.loc[investment_index,0] = pred2*X2['full_sq']     
        return res[0].values.flatten()

In [3]:
data = pd.read_csv('./clean data/train_clean.csv',index_col='id',parse_dates=['timestamp'])
data = fact_binary(data)
data = preprocess(data)
data = data.drop(['sub_area','ecology'],axis=1)
data = data.loc[~data['full_sq'].isnull(),:]
train = data

  data = data.applymap(lambda x: x if x!='yes' else 1)
  data = data.applymap(lambda x: x if x!='no' else 0)
  data = data.applymap(lambda x: x if x!='OwnerOccupier' else 0)
  data = data.applymap(lambda x: x if x!='Investment' else 1)


In [4]:
#    https://www.kaggle.com/aharless/exercising-the-exorcism
#
#

rate_2015_q2 = 1
rate_2015_q1 = rate_2015_q2 / 0.9932
rate_2014_q4 = rate_2015_q1 / 1.0112
rate_2014_q3 = rate_2014_q4 / 1.0169
rate_2014_q2 = rate_2014_q3 / 1.0086
rate_2014_q1 = rate_2014_q2 / 1.0126
rate_2013_q4 = rate_2014_q1 / 0.9902
rate_2013_q3 = rate_2013_q4 / 1.0041
rate_2013_q2 = rate_2013_q3 / 1.0044
rate_2013_q1 = rate_2013_q2 / 1.0104  # This is 1.002 (relative to mult), close to 1:
rate_2012_q4 = rate_2013_q1 / 0.9832  #     maybe use 2013q1 as a base quarter and get rid of mult?
rate_2012_q3 = rate_2012_q4 / 1.0277
rate_2012_q2 = rate_2012_q3 / 1.0279
rate_2012_q1 = rate_2012_q2 / 1.0279
rate_2011_q4 = rate_2012_q1 / 1.076
rate_2011_q3 = rate_2011_q4 / 1.0236
rate_2011_q2 = rate_2011_q3 / 1
rate_2011_q1 = rate_2011_q2 / 1.011


# train 2015
train['average_q_price'] = 1

train_2015_q2_index = train.loc[train['timestamp'].dt.year == 2015].loc[train['timestamp'].dt.month >= 4].loc[train['timestamp'].dt.month < 7].index
train.loc[train_2015_q2_index, 'average_q_price'] = rate_2015_q2

train_2015_q1_index = train.loc[train['timestamp'].dt.year == 2015].loc[train['timestamp'].dt.month >= 1].loc[train['timestamp'].dt.month < 4].index
train.loc[train_2015_q1_index, 'average_q_price'] = rate_2015_q1


# train 2014
train_2014_q4_index = train.loc[train['timestamp'].dt.year == 2014].loc[train['timestamp'].dt.month >= 10].loc[train['timestamp'].dt.month <= 12].index
train.loc[train_2014_q4_index, 'average_q_price'] = rate_2014_q4

train_2014_q3_index = train.loc[train['timestamp'].dt.year == 2014].loc[train['timestamp'].dt.month >= 7].loc[train['timestamp'].dt.month < 10].index
train.loc[train_2014_q3_index, 'average_q_price'] = rate_2014_q3

train_2014_q2_index = train.loc[train['timestamp'].dt.year == 2014].loc[train['timestamp'].dt.month >= 4].loc[train['timestamp'].dt.month < 7].index
train.loc[train_2014_q2_index, 'average_q_price'] = rate_2014_q2

train_2014_q1_index = train.loc[train['timestamp'].dt.year == 2014].loc[train['timestamp'].dt.month >= 1].loc[train['timestamp'].dt.month < 4].index
train.loc[train_2014_q1_index, 'average_q_price'] = rate_2014_q1


# train 2013
train_2013_q4_index = train.loc[train['timestamp'].dt.year == 2013].loc[train['timestamp'].dt.month >= 10].loc[train['timestamp'].dt.month <= 12].index
train.loc[train_2013_q4_index, 'average_q_price'] = rate_2013_q4

train_2013_q3_index = train.loc[train['timestamp'].dt.year == 2013].loc[train['timestamp'].dt.month >= 7].loc[train['timestamp'].dt.month < 10].index
train.loc[train_2013_q3_index, 'average_q_price'] = rate_2013_q3

train_2013_q2_index = train.loc[train['timestamp'].dt.year == 2013].loc[train['timestamp'].dt.month >= 4].loc[train['timestamp'].dt.month < 7].index
train.loc[train_2013_q2_index, 'average_q_price'] = rate_2013_q2

train_2013_q1_index = train.loc[train['timestamp'].dt.year == 2013].loc[train['timestamp'].dt.month >= 1].loc[train['timestamp'].dt.month < 4].index
train.loc[train_2013_q1_index, 'average_q_price'] = rate_2013_q1


# train 2012
train_2012_q4_index = train.loc[train['timestamp'].dt.year == 2012].loc[train['timestamp'].dt.month >= 10].loc[train['timestamp'].dt.month <= 12].index
train.loc[train_2012_q4_index, 'average_q_price'] = rate_2012_q4

train_2012_q3_index = train.loc[train['timestamp'].dt.year == 2012].loc[train['timestamp'].dt.month >= 7].loc[train['timestamp'].dt.month < 10].index
train.loc[train_2012_q3_index, 'average_q_price'] = rate_2012_q3

train_2012_q2_index = train.loc[train['timestamp'].dt.year == 2012].loc[train['timestamp'].dt.month >= 4].loc[train['timestamp'].dt.month < 7].index
train.loc[train_2012_q2_index, 'average_q_price'] = rate_2012_q2

train_2012_q1_index = train.loc[train['timestamp'].dt.year == 2012].loc[train['timestamp'].dt.month >= 1].loc[train['timestamp'].dt.month < 4].index
train.loc[train_2012_q1_index, 'average_q_price'] = rate_2012_q1


# train 2011
train_2011_q4_index = train.loc[train['timestamp'].dt.year == 2011].loc[train['timestamp'].dt.month >= 10].loc[train['timestamp'].dt.month <= 12].index
train.loc[train_2011_q4_index, 'average_q_price'] = rate_2011_q4

train_2011_q3_index = train.loc[train['timestamp'].dt.year == 2011].loc[train['timestamp'].dt.month >= 7].loc[train['timestamp'].dt.month < 10].index
train.loc[train_2011_q3_index, 'average_q_price'] = rate_2011_q3

train_2011_q2_index = train.loc[train['timestamp'].dt.year == 2011].loc[train['timestamp'].dt.month >= 4].loc[train['timestamp'].dt.month < 7].index
train.loc[train_2011_q2_index, 'average_q_price'] = rate_2011_q2

train_2011_q1_index = train.loc[train['timestamp'].dt.year == 2011].loc[train['timestamp'].dt.month >= 1].loc[train['timestamp'].dt.month < 4].index
train.loc[train_2011_q1_index, 'average_q_price'] = rate_2011_q1

train['price_doc'] = train['price_doc'] * train['average_q_price']


#########################################################################################################


X = train.drop(["price_doc", "average_q_price"],axis=1)
y = train['price_doc']

  train.loc[train_2015_q1_index, 'average_q_price'] = rate_2015_q1


In [5]:
kaggle_test = pd.read_csv('./clean data/test_clean.csv', parse_dates=['timestamp'],index_col='id')
test1 = kaggle_test.copy()
test1 = preprocess(test1)
test1 = fact_binary(test1)
#test1 = test1.merge(macro,on='timestamp')
test1 = test1.drop(['ecology','sub_area'],axis=1)
test1.loc[test1['full_sq'].isnull(),'full_sq'] = 50
test1.loc[test1['product_type'].isnull(),'product_type'] = 1

  data = data.applymap(lambda x: x if x!='yes' else 1)
  data = data.applymap(lambda x: x if x!='no' else 0)
  data = data.applymap(lambda x: x if x!='OwnerOccupier' else 0)
  data = data.applymap(lambda x: x if x!='Investment' else 1)


In [6]:
# 0.31010 params
owner_params = {
    'n_estimators': 388, 
    'lambda_l2': 5.8771302319480325, 
    'bagging_fraction': 0.8660359026008017, 
    'num_leaves': 202, 
    'feature_fraction': 0.7359742854201246, 
    'max_depth': 9, 
    'learning_rate': 0.08215092367951148, 
    'subsample': 0.41686599402329344, 
    'colsample_bytree': 0.1944406869188812, 
    'min_data_in_leaf': 11, 
    'max_bin': 392
}

investment_params = {
    'n_estimators': 640, 
    'lambda_l2': 6.421076660114255, 
    'bagging_fraction': 0.8195474653271653, 
    'num_leaves': 474, 
    'feature_fraction': 0.5006530112297926, 
    'max_depth': 10, 
    'learning_rate': 0.010757243865305521, 
    'subsample': 0.24435014285782658, 
    'colsample_bytree': 0.2998744009955994, 
    'min_data_in_leaf': 36, 
    'max_bin': 274
}

est = sep_estimator(owner_params=owner_params,investment_params=investment_params)
est.fit(X,y)

[I 2023-11-14 07:31:11,087] A new study created in memory with name: no-name-9cc4c708-611d-4275-81a5-3f7075677272




[I 2023-11-14 07:31:12,741] Trial 0 finished with value: 26663.787991699537 and parameters: {'n_estimators': 303, 'lambda_l2': 1.5451618013220791, 'bagging_fraction': 0.8983772472892508, 'num_leaves': 205, 'feature_fraction': 0.7671819568910898, 'max_depth': 7, 'learning_rate': 0.0027599198418127506, 'subsample': 0.7422927956260987, 'colsample_bytree': 0.4422032921125471, 'min_data_in_leaf': 27, 'max_bin': 173}. Best is trial 0 with value: 26663.787991699537.




[I 2023-11-14 07:31:13,722] Trial 1 finished with value: 21100.629482350112 and parameters: {'n_estimators': 480, 'lambda_l2': 4.111772170819834, 'bagging_fraction': 0.6734266637171888, 'num_leaves': 49, 'feature_fraction': 0.6269384779588572, 'max_depth': 6, 'learning_rate': 0.02267871066816075, 'subsample': 0.7136527901381782, 'colsample_bytree': 0.515820060771841, 'min_data_in_leaf': 31, 'max_bin': 359}. Best is trial 1 with value: 21100.629482350112.




[I 2023-11-14 07:31:16,538] Trial 2 finished with value: 24596.885676172136 and parameters: {'n_estimators': 397, 'lambda_l2': 1.41916550747201, 'bagging_fraction': 0.9096726203978404, 'num_leaves': 153, 'feature_fraction': 0.9802202908829667, 'max_depth': 7, 'learning_rate': 0.0028175511871413646, 'subsample': 0.7397384052805466, 'colsample_bytree': 0.971889746131273, 'min_data_in_leaf': 14, 'max_bin': 487}. Best is trial 1 with value: 21100.629482350112.




[I 2023-11-14 07:31:17,249] Trial 3 finished with value: 20952.47254083309 and parameters: {'n_estimators': 252, 'lambda_l2': 7.7731494604131814, 'bagging_fraction': 0.8684172040714152, 'num_leaves': 109, 'feature_fraction': 0.6877839898109102, 'max_depth': 7, 'learning_rate': 0.05598549690332671, 'subsample': 0.8522907880003978, 'colsample_bytree': 0.4622552117017084, 'min_data_in_leaf': 43, 'max_bin': 280}. Best is trial 3 with value: 20952.47254083309.




[I 2023-11-14 07:31:18,672] Trial 4 finished with value: 20914.80543672159 and parameters: {'n_estimators': 457, 'lambda_l2': 5.393850274016706, 'bagging_fraction': 0.9551692115057053, 'num_leaves': 242, 'feature_fraction': 0.969474121721946, 'max_depth': 8, 'learning_rate': 0.04914916917692899, 'subsample': 0.08042944091659375, 'colsample_bytree': 0.4094858458898397, 'min_data_in_leaf': 49, 'max_bin': 446}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:19,071] Trial 5 finished with value: 23226.535735496793 and parameters: {'n_estimators': 159, 'lambda_l2': 2.8283325786522164, 'bagging_fraction': 0.849958475297611, 'num_leaves': 8, 'feature_fraction': 0.6098987541695954, 'max_depth': 10, 'learning_rate': 0.02170604412005459, 'subsample': 0.3034467915794841, 'colsample_bytree': 0.06769165916615896, 'min_data_in_leaf': 42, 'max_bin': 250}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:20,934] Trial 6 finished with value: 22195.044259285532 and parameters: {'n_estimators': 474, 'lambda_l2': 8.457843655543693, 'bagging_fraction': 0.7278912787115518, 'num_leaves': 190, 'feature_fraction': 0.8267171845421419, 'max_depth': 7, 'learning_rate': 0.004557224570015624, 'subsample': 0.4007675862592253, 'colsample_bytree': 0.9467544478762777, 'min_data_in_leaf': 26, 'max_bin': 182}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:23,626] Trial 7 finished with value: 21450.008048422584 and parameters: {'n_estimators': 467, 'lambda_l2': 7.961564851219222, 'bagging_fraction': 0.9635907608273047, 'num_leaves': 232, 'feature_fraction': 0.8782109336672677, 'max_depth': 7, 'learning_rate': 0.0059002612515205955, 'subsample': 0.02623911166095841, 'colsample_bytree': 0.22427152044479018, 'min_data_in_leaf': 11, 'max_bin': 210}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:26,534] Trial 8 finished with value: 20994.966334798857 and parameters: {'n_estimators': 284, 'lambda_l2': 2.9799454804711054, 'bagging_fraction': 0.9292611913272842, 'num_leaves': 224, 'feature_fraction': 0.797225451144093, 'max_depth': 9, 'learning_rate': 0.011881373404845186, 'subsample': 0.9241747562158175, 'colsample_bytree': 0.22402751170225746, 'min_data_in_leaf': 19, 'max_bin': 332}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:28,437] Trial 9 finished with value: 21366.08291739017 and parameters: {'n_estimators': 368, 'lambda_l2': 8.752393407951585, 'bagging_fraction': 0.7590431728719297, 'num_leaves': 87, 'feature_fraction': 0.5911366891104554, 'max_depth': 8, 'learning_rate': 0.010324586028325618, 'subsample': 0.2291122996851163, 'colsample_bytree': 0.40239595149592383, 'min_data_in_leaf': 31, 'max_bin': 366}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:28,960] Trial 10 finished with value: 21558.934701211136 and parameters: {'n_estimators': 103, 'lambda_l2': 5.897554717142198, 'bagging_fraction': 0.5892413121828695, 'num_leaves': 156, 'feature_fraction': 0.9970979656297432, 'max_depth': 5, 'learning_rate': 0.09979979603905127, 'subsample': 0.02881770749589302, 'colsample_bytree': 0.6840326439019395, 'min_data_in_leaf': 48, 'max_bin': 488}. Best is trial 4 with value: 20914.80543672159.




[I 2023-11-14 07:31:29,897] Trial 11 finished with value: 20871.43543121492 and parameters: {'n_estimators': 221, 'lambda_l2': 6.671838775319134, 'bagging_fraction': 0.9956157122947142, 'num_leaves': 105, 'feature_fraction': 0.7026783248829033, 'max_depth': 9, 'learning_rate': 0.06457590214419719, 'subsample': 0.5495740629460044, 'colsample_bytree': 0.6195391748153267, 'min_data_in_leaf': 42, 'max_bin': 415}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:30,884] Trial 12 finished with value: 20905.33552040556 and parameters: {'n_estimators': 205, 'lambda_l2': 6.038761304091276, 'bagging_fraction': 0.9985692370482243, 'num_leaves': 128, 'feature_fraction': 0.7035273985289132, 'max_depth': 9, 'learning_rate': 0.05357865304990458, 'subsample': 0.5434741383486551, 'colsample_bytree': 0.6733779873810524, 'min_data_in_leaf': 50, 'max_bin': 418}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:31,889] Trial 13 finished with value: 20932.164475841248 and parameters: {'n_estimators': 206, 'lambda_l2': 6.300608833287307, 'bagging_fraction': 0.9846293664575423, 'num_leaves': 78, 'feature_fraction': 0.5105096003208225, 'max_depth': 10, 'learning_rate': 0.08258080157941146, 'subsample': 0.5364108147457667, 'colsample_bytree': 0.7009284777351048, 'min_data_in_leaf': 38, 'max_bin': 407}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:33,008] Trial 14 finished with value: 21150.233086250708 and parameters: {'n_estimators': 198, 'lambda_l2': 6.897703817280083, 'bagging_fraction': 0.9969823269382486, 'num_leaves': 144, 'feature_fraction': 0.70821012580574, 'max_depth': 9, 'learning_rate': 0.03547466171976314, 'subsample': 0.5596759138958434, 'colsample_bytree': 0.700957900852397, 'min_data_in_leaf': 40, 'max_bin': 419}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:33,901] Trial 15 finished with value: 36502.145630827144 and parameters: {'n_estimators': 128, 'lambda_l2': 9.524915557082823, 'bagging_fraction': 0.8553148307468639, 'num_leaves': 114, 'feature_fraction': 0.7057873439400775, 'max_depth': 9, 'learning_rate': 0.0013107207102822712, 'subsample': 0.46793517502448023, 'colsample_bytree': 0.833415208751524, 'min_data_in_leaf': 50, 'max_bin': 396}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:35,305] Trial 16 finished with value: 21151.9012690977 and parameters: {'n_estimators': 234, 'lambda_l2': 4.74626894436681, 'bagging_fraction': 0.9935848797542215, 'num_leaves': 51, 'feature_fraction': 0.7477103890322454, 'max_depth': 9, 'learning_rate': 0.030947546445702558, 'subsample': 0.612138620432479, 'colsample_bytree': 0.593330386146664, 'min_data_in_leaf': 35, 'max_bin': 456}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:36,246] Trial 17 finished with value: 20950.124125015976 and parameters: {'n_estimators': 168, 'lambda_l2': 6.971838068907953, 'bagging_fraction': 0.8210492475857267, 'num_leaves': 182, 'feature_fraction': 0.6742734192354787, 'max_depth': 10, 'learning_rate': 0.06722901185739458, 'subsample': 0.3997928704475347, 'colsample_bytree': 0.8201508177130479, 'min_data_in_leaf': 45, 'max_bin': 297}. Best is trial 11 with value: 20871.43543121492.




[I 2023-11-14 07:31:37,576] Trial 18 finished with value: 20722.076843490293 and parameters: {'n_estimators': 323, 'lambda_l2': 5.011793520469832, 'bagging_fraction': 0.9229357066895534, 'num_leaves': 86, 'feature_fraction': 0.7565427920192774, 'max_depth': 8, 'learning_rate': 0.09561731736705152, 'subsample': 0.659121776782498, 'colsample_bytree': 0.6406235409200229, 'min_data_in_leaf': 36, 'max_bin': 512}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:38,282] Trial 19 finished with value: 21624.156251051212 and parameters: {'n_estimators': 331, 'lambda_l2': 4.7098448944340365, 'bagging_fraction': 0.9268331616432273, 'num_leaves': 4, 'feature_fraction': 0.8477000460308872, 'max_depth': 8, 'learning_rate': 0.08870360564695826, 'subsample': 0.6401740553698579, 'colsample_bytree': 0.5497647733753102, 'min_data_in_leaf': 35, 'max_bin': 510}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:39,243] Trial 20 finished with value: 21022.15279119192 and parameters: {'n_estimators': 398, 'lambda_l2': 3.874517699373239, 'bagging_fraction': 0.8051506322332975, 'num_leaves': 52, 'feature_fraction': 0.758216942819566, 'max_depth': 6, 'learning_rate': 0.0401471184213104, 'subsample': 0.9895751685075098, 'colsample_bytree': 0.5789661149466135, 'min_data_in_leaf': 36, 'max_bin': 451}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:40,250] Trial 21 finished with value: 21004.367756486874 and parameters: {'n_estimators': 270, 'lambda_l2': 5.874679313008398, 'bagging_fraction': 0.998792804923533, 'num_leaves': 91, 'feature_fraction': 0.7308691323051063, 'max_depth': 9, 'learning_rate': 0.05756927827892542, 'subsample': 0.6263682884177106, 'colsample_bytree': 0.6447027402585471, 'min_data_in_leaf': 45, 'max_bin': 373}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:41,320] Trial 22 finished with value: 20801.187811608852 and parameters: {'n_estimators': 333, 'lambda_l2': 6.760171794712744, 'bagging_fraction': 0.9424961332130355, 'num_leaves': 127, 'feature_fraction': 0.6770596841594216, 'max_depth': 8, 'learning_rate': 0.09542680606663868, 'subsample': 0.46056871829217355, 'colsample_bytree': 0.7757397443017251, 'min_data_in_leaf': 45, 'max_bin': 433}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:42,336] Trial 23 finished with value: 20900.70006565716 and parameters: {'n_estimators': 341, 'lambda_l2': 6.928182522029999, 'bagging_fraction': 0.9396217641545889, 'num_leaves': 107, 'feature_fraction': 0.6557166896816166, 'max_depth': 8, 'learning_rate': 0.08549385000041318, 'subsample': 0.4833763100446039, 'colsample_bytree': 0.8002489866973161, 'min_data_in_leaf': 40, 'max_bin': 477}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:43,337] Trial 24 finished with value: 20896.6718486086 and parameters: {'n_estimators': 315, 'lambda_l2': 5.137856718465101, 'bagging_fraction': 0.9054066436335155, 'num_leaves': 75, 'feature_fraction': 0.6566189570566301, 'max_depth': 8, 'learning_rate': 0.09946243598259172, 'subsample': 0.6808696503811537, 'colsample_bytree': 0.7633731474155004, 'min_data_in_leaf': 45, 'max_bin': 502}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:44,568] Trial 25 finished with value: 20740.190040469242 and parameters: {'n_estimators': 363, 'lambda_l2': 7.355157381208357, 'bagging_fraction': 0.950775676690197, 'num_leaves': 128, 'feature_fraction': 0.7843598184855093, 'max_depth': 8, 'learning_rate': 0.05643668723454852, 'subsample': 0.8018786594110747, 'colsample_bytree': 0.8833479746334376, 'min_data_in_leaf': 33, 'max_bin': 443}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:45,686] Trial 26 finished with value: 20752.997813817627 and parameters: {'n_estimators': 404, 'lambda_l2': 7.256512314042869, 'bagging_fraction': 0.8776671545751851, 'num_leaves': 134, 'feature_fraction': 0.7877585355820038, 'max_depth': 6, 'learning_rate': 0.03997149961838273, 'subsample': 0.7757780177629531, 'colsample_bytree': 0.900175004147864, 'min_data_in_leaf': 23, 'max_bin': 448}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:46,947] Trial 27 finished with value: 20924.678703817994 and parameters: {'n_estimators': 427, 'lambda_l2': 9.200392717568763, 'bagging_fraction': 0.8911038324503603, 'num_leaves': 161, 'feature_fraction': 0.7922642404181414, 'max_depth': 6, 'learning_rate': 0.039575862903150426, 'subsample': 0.8042243690546326, 'colsample_bytree': 0.8800769780297455, 'min_data_in_leaf': 21, 'max_bin': 465}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:47,903] Trial 28 finished with value: 21236.36947921729 and parameters: {'n_estimators': 366, 'lambda_l2': 9.938163023493777, 'bagging_fraction': 0.8800352040820967, 'num_leaves': 172, 'feature_fraction': 0.8881899444317395, 'max_depth': 5, 'learning_rate': 0.02721177499411083, 'subsample': 0.8266143448253574, 'colsample_bytree': 0.9069873211722685, 'min_data_in_leaf': 23, 'max_bin': 387}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:49,011] Trial 29 finished with value: 21383.931062844327 and parameters: {'n_estimators': 428, 'lambda_l2': 7.803819039024265, 'bagging_fraction': 0.8974969861998628, 'num_leaves': 204, 'feature_fraction': 0.7885987797635813, 'max_depth': 5, 'learning_rate': 0.01862919125723396, 'subsample': 0.7696314199708657, 'colsample_bytree': 0.967863172250606, 'min_data_in_leaf': 27, 'max_bin': 336}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:49,962] Trial 30 finished with value: 21011.570498744262 and parameters: {'n_estimators': 377, 'lambda_l2': 7.580831171668521, 'bagging_fraction': 0.8410846154188082, 'num_leaves': 138, 'feature_fraction': 0.7416956216605692, 'max_depth': 6, 'learning_rate': 0.041541007703935935, 'subsample': 0.8823733452231173, 'colsample_bytree': 0.8823210597910643, 'min_data_in_leaf': 33, 'max_bin': 508}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:51,155] Trial 31 finished with value: 20790.52495197941 and parameters: {'n_estimators': 296, 'lambda_l2': 7.262062885253217, 'bagging_fraction': 0.9482242878480441, 'num_leaves': 126, 'feature_fraction': 0.7723118773193522, 'max_depth': 8, 'learning_rate': 0.06718918734118746, 'subsample': 0.7355699497777166, 'colsample_bytree': 0.7694546374180755, 'min_data_in_leaf': 24, 'max_bin': 441}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:52,265] Trial 32 finished with value: 20800.994255674366 and parameters: {'n_estimators': 302, 'lambda_l2': 8.495445821118608, 'bagging_fraction': 0.94591533187947, 'num_leaves': 122, 'feature_fraction': 0.786290423123992, 'max_depth': 8, 'learning_rate': 0.06203356851056371, 'subsample': 0.7694152645472763, 'colsample_bytree': 0.7487530049049652, 'min_data_in_leaf': 28, 'max_bin': 468}. Best is trial 18 with value: 20722.076843490293.




[I 2023-11-14 07:31:53,732] Trial 33 finished with value: 20538.4371109275 and parameters: {'n_estimators': 418, 'lambda_l2': 7.435030830729543, 'bagging_fraction': 0.9037758934897409, 'num_leaves': 66, 'feature_fraction': 0.7659164929338254, 'max_depth': 7, 'learning_rate': 0.04576392888467908, 'subsample': 0.7023410154529746, 'colsample_bytree': 0.8879620011757393, 'min_data_in_leaf': 16, 'max_bin': 436}. Best is trial 33 with value: 20538.4371109275.




[I 2023-11-14 07:31:55,510] Trial 34 finished with value: 20738.136376434544 and parameters: {'n_estimators': 497, 'lambda_l2': 8.030670955337158, 'bagging_fraction': 0.9113811193647994, 'num_leaves': 66, 'feature_fraction': 0.819243765568467, 'max_depth': 7, 'learning_rate': 0.030249855313549248, 'subsample': 0.6909922862944146, 'colsample_bytree': 0.9182743205074554, 'min_data_in_leaf': 17, 'max_bin': 483}. Best is trial 33 with value: 20538.4371109275.




[I 2023-11-14 07:31:57,185] Trial 35 finished with value: 20946.60418138506 and parameters: {'n_estimators': 445, 'lambda_l2': 8.133955447639527, 'bagging_fraction': 0.9190067887341327, 'num_leaves': 30, 'feature_fraction': 0.8271708441150257, 'max_depth': 7, 'learning_rate': 0.023755818200823325, 'subsample': 0.6894477124650567, 'colsample_bytree': 0.963398940350374, 'min_data_in_leaf': 16, 'max_bin': 483}. Best is trial 33 with value: 20538.4371109275.




[I 2023-11-14 07:31:59,070] Trial 36 finished with value: 20409.385783453577 and parameters: {'n_estimators': 500, 'lambda_l2': 0.36978134386024575, 'bagging_fraction': 0.9108190713445593, 'num_leaves': 63, 'feature_fraction': 0.8208352381370674, 'max_depth': 7, 'learning_rate': 0.04551941496086245, 'subsample': 0.7251169328279444, 'colsample_bytree': 0.9891475516872527, 'min_data_in_leaf': 16, 'max_bin': 487}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:01,540] Trial 37 finished with value: 20454.41489195716 and parameters: {'n_estimators': 494, 'lambda_l2': 0.5292877689906464, 'bagging_fraction': 0.8947176714417903, 'num_leaves': 61, 'feature_fraction': 0.8154331523275354, 'max_depth': 7, 'learning_rate': 0.017184767843523477, 'subsample': 0.7008201474718564, 'colsample_bytree': 0.9842023643839034, 'min_data_in_leaf': 10, 'max_bin': 493}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:03,606] Trial 38 finished with value: 20862.441021175062 and parameters: {'n_estimators': 488, 'lambda_l2': 0.14345401076756137, 'bagging_fraction': 0.8851595090745688, 'num_leaves': 28, 'feature_fraction': 0.8599237288339391, 'max_depth': 7, 'learning_rate': 0.018142054441520755, 'subsample': 0.7445590898649979, 'colsample_bytree': 0.9803043408795488, 'min_data_in_leaf': 11, 'max_bin': 509}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:05,309] Trial 39 finished with value: 20871.530427558162 and parameters: {'n_estimators': 451, 'lambda_l2': 0.9952722504592546, 'bagging_fraction': 0.8653361963527634, 'num_leaves': 32, 'feature_fraction': 0.7312135239900456, 'max_depth': 7, 'learning_rate': 0.014616380392246935, 'subsample': 0.7064798117425111, 'colsample_bytree': 0.994221239197006, 'min_data_in_leaf': 13, 'max_bin': 250}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:06,870] Trial 40 finished with value: 20660.649194327165 and parameters: {'n_estimators': 423, 'lambda_l2': 2.3327270661371347, 'bagging_fraction': 0.8366145858177257, 'num_leaves': 65, 'feature_fraction': 0.9193637091919792, 'max_depth': 7, 'learning_rate': 0.047094396113835715, 'subsample': 0.8572490964122409, 'colsample_bytree': 0.8428318493705919, 'min_data_in_leaf': 14, 'max_bin': 136}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:08,500] Trial 41 finished with value: 20415.514568599086 and parameters: {'n_estimators': 500, 'lambda_l2': 1.9475706838854432, 'bagging_fraction': 0.8325677324629843, 'num_leaves': 62, 'feature_fraction': 0.9060079627348612, 'max_depth': 7, 'learning_rate': 0.04765725094026455, 'subsample': 0.6505248981335009, 'colsample_bytree': 0.8466579502340976, 'min_data_in_leaf': 16, 'max_bin': 227}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:10,126] Trial 42 finished with value: 20684.27468843233 and parameters: {'n_estimators': 475, 'lambda_l2': 1.8378914736464813, 'bagging_fraction': 0.8368849770309594, 'num_leaves': 64, 'feature_fraction': 0.911219772702199, 'max_depth': 7, 'learning_rate': 0.04578803283345987, 'subsample': 0.8517000834829983, 'colsample_bytree': 0.9994367970374911, 'min_data_in_leaf': 14, 'max_bin': 178}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:11,438] Trial 43 finished with value: 20780.075336775422 and parameters: {'n_estimators': 428, 'lambda_l2': 0.26914659388562445, 'bagging_fraction': 0.8040358081235719, 'num_leaves': 43, 'feature_fraction': 0.9354834709372731, 'max_depth': 6, 'learning_rate': 0.031068367696907195, 'subsample': 0.5959614927282758, 'colsample_bytree': 0.8587350967748866, 'min_data_in_leaf': 18, 'max_bin': 201}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:12,772] Trial 44 finished with value: 20831.363842406277 and parameters: {'n_estimators': 496, 'lambda_l2': 1.593201660205402, 'bagging_fraction': 0.8570411235109867, 'num_leaves': 17, 'feature_fraction': 0.9491212097568339, 'max_depth': 7, 'learning_rate': 0.046083752218984475, 'subsample': 0.7249759725640043, 'colsample_bytree': 0.9335380309970475, 'min_data_in_leaf': 10, 'max_bin': 151}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:14,330] Trial 45 finished with value: 20539.418637382183 and parameters: {'n_estimators': 461, 'lambda_l2': 0.9007902057746557, 'bagging_fraction': 0.7692583652750742, 'num_leaves': 65, 'feature_fraction': 0.8905834149620082, 'max_depth': 7, 'learning_rate': 0.025039212387997476, 'subsample': 0.8973465943217014, 'colsample_bytree': 0.9369197312357939, 'min_data_in_leaf': 14, 'max_bin': 130}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:15,750] Trial 46 finished with value: 20777.641329142094 and parameters: {'n_estimators': 465, 'lambda_l2': 0.7756011426115237, 'bagging_fraction': 0.7762918359314672, 'num_leaves': 93, 'feature_fraction': 0.8863018428238998, 'max_depth': 6, 'learning_rate': 0.023828383974080772, 'subsample': 0.9296673744152355, 'colsample_bytree': 0.9173630265859198, 'min_data_in_leaf': 16, 'max_bin': 252}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:17,309] Trial 47 finished with value: 20899.70826188716 and parameters: {'n_estimators': 479, 'lambda_l2': 1.1818840453986132, 'bagging_fraction': 0.6948980810632628, 'num_leaves': 40, 'feature_fraction': 0.8574665632353723, 'max_depth': 7, 'learning_rate': 0.020085640591593804, 'subsample': 0.6428839177535162, 'colsample_bytree': 0.9465684319426164, 'min_data_in_leaf': 19, 'max_bin': 225}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:19,495] Trial 48 finished with value: 20672.264633401013 and parameters: {'n_estimators': 452, 'lambda_l2': 0.5780919465793422, 'bagging_fraction': 0.9685345912626903, 'num_leaves': 60, 'feature_fraction': 0.815879059195689, 'max_depth': 7, 'learning_rate': 0.01436846567721936, 'subsample': 0.590973207146732, 'colsample_bytree': 0.8541179432531479, 'min_data_in_leaf': 12, 'max_bin': 273}. Best is trial 36 with value: 20409.385783453577.




[I 2023-11-14 07:32:20,829] Trial 49 finished with value: 20701.92823486642 and parameters: {'n_estimators': 467, 'lambda_l2': 0.5308734422980224, 'bagging_fraction': 0.870250751544395, 'num_leaves': 74, 'feature_fraction': 0.8376462331751612, 'max_depth': 6, 'learning_rate': 0.03447886074010042, 'subsample': 0.6623963696652249, 'colsample_bytree': 0.9439748348519518, 'min_data_in_leaf': 20, 'max_bin': 154}. Best is trial 36 with value: 20409.385783453577.
[I 2023-11-14 07:32:20,941] A new study created in memory with name: no-name-b820eefc-87c5-4f80-9bba-1799f83f9bc8


Best hyperparameters: {'n_estimators': 500, 'lambda_l2': 0.36978134386024575, 'bagging_fraction': 0.9108190713445593, 'num_leaves': 63, 'feature_fraction': 0.8208352381370674, 'max_depth': 7, 'learning_rate': 0.04551941496086245, 'subsample': 0.7251169328279444, 'colsample_bytree': 0.9891475516872527, 'min_data_in_leaf': 16, 'max_bin': 487}
Best RMSE: 20409.385783453577


[I 2023-11-14 07:32:32,457] Trial 0 finished with value: 40959.77325040272 and parameters: {'n_estimators': 997, 'lambda_l2': 3.668358021164728, 'bagging_fraction': 0.7155481192717512, 'num_leaves': 815, 'feature_fraction': 0.6623653326002172, 'max_depth': 14, 'learning_rate': 0.07112300970278869, 'subsample': 0.5851008642775202, 'colsample_bytree': 0.6166817077366326, 'min_data_in_leaf': 81, 'max_bin': 320}. Best is trial 0 with value: 40959.77325040272.




[I 2023-11-14 07:32:38,244] Trial 1 finished with value: 38470.513486224394 and parameters: {'n_estimators': 947, 'lambda_l2': 9.263219460354335, 'bagging_fraction': 0.6022478292257032, 'num_leaves': 363, 'feature_fraction': 0.5040007123553449, 'max_depth': 8, 'learning_rate': 0.01221409643316109, 'subsample': 0.8774036281497353, 'colsample_bytree': 0.961538575965723, 'min_data_in_leaf': 72, 'max_bin': 321}. Best is trial 1 with value: 38470.513486224394.




[I 2023-11-14 07:32:44,613] Trial 2 finished with value: 39463.07426287649 and parameters: {'n_estimators': 711, 'lambda_l2': 9.510278461655734, 'bagging_fraction': 0.5320959761210798, 'num_leaves': 264, 'feature_fraction': 0.6874444896960789, 'max_depth': 9, 'learning_rate': 0.003582364170664674, 'subsample': 0.44106893684418963, 'colsample_bytree': 0.42302668886357736, 'min_data_in_leaf': 74, 'max_bin': 200}. Best is trial 1 with value: 38470.513486224394.




[I 2023-11-14 07:33:06,382] Trial 3 finished with value: 38469.37739993295 and parameters: {'n_estimators': 810, 'lambda_l2': 0.6624843173055073, 'bagging_fraction': 0.8569598993441219, 'num_leaves': 402, 'feature_fraction': 0.9675102822641174, 'max_depth': 11, 'learning_rate': 0.007297561322484232, 'subsample': 0.5390676007260381, 'colsample_bytree': 0.43341093081626086, 'min_data_in_leaf': 64, 'max_bin': 508}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:33:10,853] Trial 4 finished with value: 39705.86367919146 and parameters: {'n_estimators': 914, 'lambda_l2': 0.4320866346572527, 'bagging_fraction': 0.926445008518451, 'num_leaves': 890, 'feature_fraction': 0.5907548405148519, 'max_depth': 5, 'learning_rate': 0.003881081415284054, 'subsample': 0.8804753619351628, 'colsample_bytree': 0.10628990930081925, 'min_data_in_leaf': 23, 'max_bin': 130}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:33:40,920] Trial 5 finished with value: 39464.06892187069 and parameters: {'n_estimators': 975, 'lambda_l2': 9.524731557395182, 'bagging_fraction': 0.6678434062127224, 'num_leaves': 606, 'feature_fraction': 0.5927447169717588, 'max_depth': 14, 'learning_rate': 0.001983392271594825, 'subsample': 0.7181928627869761, 'colsample_bytree': 0.25432970217337514, 'min_data_in_leaf': 26, 'max_bin': 382}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:34:03,661] Trial 6 finished with value: 39975.909728363615 and parameters: {'n_estimators': 874, 'lambda_l2': 6.7339718209772705, 'bagging_fraction': 0.9011203408328874, 'num_leaves': 935, 'feature_fraction': 0.5524890186419197, 'max_depth': 15, 'learning_rate': 0.030115982965969366, 'subsample': 0.8161021979886469, 'colsample_bytree': 0.20679463079932314, 'min_data_in_leaf': 17, 'max_bin': 266}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:34:15,860] Trial 7 finished with value: 38672.03600663798 and parameters: {'n_estimators': 913, 'lambda_l2': 5.233530379706995, 'bagging_fraction': 0.84328055114376, 'num_leaves': 449, 'feature_fraction': 0.5716548975369572, 'max_depth': 9, 'learning_rate': 0.0036937826771350608, 'subsample': 0.25747221221553246, 'colsample_bytree': 0.8023394129597526, 'min_data_in_leaf': 17, 'max_bin': 137}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:34:26,517] Trial 8 finished with value: 41977.641886529134 and parameters: {'n_estimators': 653, 'lambda_l2': 3.571072974997457, 'bagging_fraction': 0.6511180150611602, 'num_leaves': 402, 'feature_fraction': 0.5263544627588084, 'max_depth': 13, 'learning_rate': 0.0015347180138721125, 'subsample': 0.07398129306012521, 'colsample_bytree': 0.4010864597378231, 'min_data_in_leaf': 50, 'max_bin': 151}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:34:42,527] Trial 9 finished with value: 43558.72848355981 and parameters: {'n_estimators': 617, 'lambda_l2': 1.9570513802692435, 'bagging_fraction': 0.7957084513496462, 'num_leaves': 219, 'feature_fraction': 0.5554004191627978, 'max_depth': 14, 'learning_rate': 0.001145046859638242, 'subsample': 0.14765689265846663, 'colsample_bytree': 0.44322296948113654, 'min_data_in_leaf': 56, 'max_bin': 455}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:34:52,389] Trial 10 finished with value: 38541.601974779114 and parameters: {'n_estimators': 790, 'lambda_l2': 0.14453971013493305, 'bagging_fraction': 0.9997088989830338, 'num_leaves': 36, 'feature_fraction': 0.9736546353157246, 'max_depth': 11, 'learning_rate': 0.011645036902801387, 'subsample': 0.4182375233816867, 'colsample_bytree': 0.02719099350439841, 'min_data_in_leaf': 48, 'max_bin': 500}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:34:57,529] Trial 11 finished with value: 38689.09075702236 and parameters: {'n_estimators': 802, 'lambda_l2': 6.986026510545555, 'bagging_fraction': 0.5167855771226317, 'num_leaves': 667, 'feature_fraction': 0.8396241243400918, 'max_depth': 7, 'learning_rate': 0.0112410045406183, 'subsample': 0.9965720443398053, 'colsample_bytree': 0.9851711704903483, 'min_data_in_leaf': 99, 'max_bin': 365}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:08,336] Trial 12 finished with value: 38616.293176482184 and parameters: {'n_estimators': 808, 'lambda_l2': 2.082661418268923, 'bagging_fraction': 0.7684839779825519, 'num_leaves': 244, 'feature_fraction': 0.8006278482937044, 'max_depth': 11, 'learning_rate': 0.019631742442487884, 'subsample': 0.6504068632126715, 'colsample_bytree': 0.6414773955366726, 'min_data_in_leaf': 71, 'max_bin': 432}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:14,544] Trial 13 finished with value: 38979.533973316655 and parameters: {'n_estimators': 734, 'lambda_l2': 7.778334939774661, 'bagging_fraction': 0.6116229609604421, 'num_leaves': 392, 'feature_fraction': 0.9941452023945015, 'max_depth': 7, 'learning_rate': 0.006304835539162745, 'subsample': 0.5420637588024974, 'colsample_bytree': 0.9822167091563125, 'min_data_in_leaf': 91, 'max_bin': 267}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:26,803] Trial 14 finished with value: 38522.311034778504 and parameters: {'n_estimators': 877, 'lambda_l2': 4.868905329147559, 'bagging_fraction': 0.758915444775268, 'num_leaves': 51, 'feature_fraction': 0.8941658346529477, 'max_depth': 11, 'learning_rate': 0.00728225234880661, 'subsample': 0.7484554461238431, 'colsample_bytree': 0.7568404173339334, 'min_data_in_leaf': 62, 'max_bin': 491}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:33,452] Trial 15 finished with value: 38633.12486308518 and parameters: {'n_estimators': 947, 'lambda_l2': 8.286876921304028, 'bagging_fraction': 0.5820216217994321, 'num_leaves': 595, 'feature_fraction': 0.7526431728170268, 'max_depth': 7, 'learning_rate': 0.01942530325361445, 'subsample': 0.9411084903870164, 'colsample_bytree': 0.5271428377954726, 'min_data_in_leaf': 35, 'max_bin': 409}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:41,727] Trial 16 finished with value: 38630.88935399633 and parameters: {'n_estimators': 841, 'lambda_l2': 5.777809002208027, 'bagging_fraction': 0.7139704746515128, 'num_leaves': 730, 'feature_fraction': 0.6835886496015797, 'max_depth': 9, 'learning_rate': 0.006760190168015178, 'subsample': 0.8320382609652273, 'colsample_bytree': 0.8459534044767243, 'min_data_in_leaf': 84, 'max_bin': 330}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:44,552] Trial 17 finished with value: 38586.95769147617 and parameters: {'n_estimators': 754, 'lambda_l2': 9.918707226106964, 'bagging_fraction': 0.8066258804452008, 'num_leaves': 510, 'feature_fraction': 0.5028674876489367, 'max_depth': 5, 'learning_rate': 0.04663733921496317, 'subsample': 0.6742631576963813, 'colsample_bytree': 0.29916356661434096, 'min_data_in_leaf': 63, 'max_bin': 237}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:35:55,081] Trial 18 finished with value: 38586.218653070566 and parameters: {'n_estimators': 667, 'lambda_l2': 4.275173743626066, 'bagging_fraction': 0.5786297326856271, 'num_leaves': 196, 'feature_fraction': 0.8940701447298445, 'max_depth': 12, 'learning_rate': 0.016001189729740815, 'subsample': 0.34567845749803117, 'colsample_bytree': 0.5947842265173191, 'min_data_in_leaf': 39, 'max_bin': 304}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:36:02,195] Trial 19 finished with value: 38755.617102114535 and parameters: {'n_estimators': 925, 'lambda_l2': 6.073733457404466, 'bagging_fraction': 0.7072490917128831, 'num_leaves': 334, 'feature_fraction': 0.6465547379675016, 'max_depth': 8, 'learning_rate': 0.028390392760687305, 'subsample': 0.5814102391576914, 'colsample_bytree': 0.725335447084739, 'min_data_in_leaf': 71, 'max_bin': 452}. Best is trial 3 with value: 38469.37739993295.




[I 2023-11-14 07:36:13,648] Trial 20 finished with value: 38466.654772346665 and parameters: {'n_estimators': 863, 'lambda_l2': 8.37597463052281, 'bagging_fraction': 0.838743603798948, 'num_leaves': 1019, 'feature_fraction': 0.7350655792896781, 'max_depth': 12, 'learning_rate': 0.00985855628918478, 'subsample': 0.7805287173632746, 'colsample_bytree': 0.8454987389047821, 'min_data_in_leaf': 63, 'max_bin': 202}. Best is trial 20 with value: 38466.654772346665.




[I 2023-11-14 07:36:25,837] Trial 21 finished with value: 38435.27106203086 and parameters: {'n_estimators': 861, 'lambda_l2': 8.448129242120853, 'bagging_fraction': 0.8474876774129152, 'num_leaves': 491, 'feature_fraction': 0.723025146335873, 'max_depth': 12, 'learning_rate': 0.009709623719797564, 'subsample': 0.7687364271354552, 'colsample_bytree': 0.8529453574094209, 'min_data_in_leaf': 62, 'max_bin': 184}. Best is trial 21 with value: 38435.27106203086.




[I 2023-11-14 07:36:37,061] Trial 22 finished with value: 38435.928157917784 and parameters: {'n_estimators': 841, 'lambda_l2': 8.339666893465008, 'bagging_fraction': 0.8524893731075035, 'num_leaves': 525, 'feature_fraction': 0.7515477204477046, 'max_depth': 12, 'learning_rate': 0.008134130131087304, 'subsample': 0.758868216256347, 'colsample_bytree': 0.8830947992309973, 'min_data_in_leaf': 61, 'max_bin': 188}. Best is trial 21 with value: 38435.27106203086.




[I 2023-11-14 07:36:50,491] Trial 23 finished with value: 38440.77794499876 and parameters: {'n_estimators': 861, 'lambda_l2': 8.35439977457018, 'bagging_fraction': 0.8387496378670708, 'num_leaves': 1000, 'feature_fraction': 0.730284794448135, 'max_depth': 12, 'learning_rate': 0.009619725985058225, 'subsample': 0.7832176405082869, 'colsample_bytree': 0.9082727182794368, 'min_data_in_leaf': 45, 'max_bin': 184}. Best is trial 21 with value: 38435.27106203086.




[I 2023-11-14 07:37:08,354] Trial 24 finished with value: 38487.19089123995 and parameters: {'n_estimators': 835, 'lambda_l2': 7.528750961735357, 'bagging_fraction': 0.8940051677798889, 'num_leaves': 754, 'feature_fraction': 0.7341873331552474, 'max_depth': 13, 'learning_rate': 0.005102318932123268, 'subsample': 0.6814347091306121, 'colsample_bytree': 0.9068630618251882, 'min_data_in_leaf': 42, 'max_bin': 183}. Best is trial 21 with value: 38435.27106203086.




[I 2023-11-14 07:37:20,145] Trial 25 finished with value: 38459.02081307895 and parameters: {'n_estimators': 767, 'lambda_l2': 8.457895160598722, 'bagging_fraction': 0.8041079068472419, 'num_leaves': 580, 'feature_fraction': 0.7742014997609322, 'max_depth': 13, 'learning_rate': 0.008930092952744793, 'subsample': 0.777432268302187, 'colsample_bytree': 0.89850177871863, 'min_data_in_leaf': 55, 'max_bin': 171}. Best is trial 21 with value: 38435.27106203086.




[I 2023-11-14 07:37:32,959] Trial 26 finished with value: 38666.27729513128 and parameters: {'n_estimators': 893, 'lambda_l2': 8.876237594930952, 'bagging_fraction': 0.8646468067218588, 'num_leaves': 121, 'feature_fraction': 0.7080199756610764, 'max_depth': 12, 'learning_rate': 0.01449356985961856, 'subsample': 0.6433246795209553, 'colsample_bytree': 0.7145647735822654, 'min_data_in_leaf': 32, 'max_bin': 229}. Best is trial 21 with value: 38435.27106203086.




[I 2023-11-14 07:37:44,385] Trial 27 finished with value: 38366.63512123794 and parameters: {'n_estimators': 843, 'lambda_l2': 7.540324921552546, 'bagging_fraction': 0.934212812297027, 'num_leaves': 507, 'feature_fraction': 0.6362360031683512, 'max_depth': 10, 'learning_rate': 0.00888018088677683, 'subsample': 0.7371134934911652, 'colsample_bytree': 0.7907368841748692, 'min_data_in_leaf': 46, 'max_bin': 230}. Best is trial 27 with value: 38366.63512123794.




[I 2023-11-14 07:37:57,571] Trial 28 finished with value: 38556.27688577657 and parameters: {'n_estimators': 822, 'lambda_l2': 7.117449203501024, 'bagging_fraction': 0.9636492868257603, 'num_leaves': 505, 'feature_fraction': 0.615518530950563, 'max_depth': 10, 'learning_rate': 0.004838960184545148, 'subsample': 0.7229667751483916, 'colsample_bytree': 0.7610388533996385, 'min_data_in_leaf': 52, 'max_bin': 237}. Best is trial 27 with value: 38366.63512123794.




[I 2023-11-14 07:38:06,054] Trial 29 finished with value: 40035.37971714469 and parameters: {'n_estimators': 997, 'lambda_l2': 7.657495324462321, 'bagging_fraction': 0.9204696430738387, 'num_leaves': 680, 'feature_fraction': 0.6499552844137975, 'max_depth': 10, 'learning_rate': 0.0668297260547953, 'subsample': 0.5832543000943513, 'colsample_bytree': 0.6820632993804449, 'min_data_in_leaf': 80, 'max_bin': 255}. Best is trial 27 with value: 38366.63512123794.




[I 2023-11-14 07:38:26,592] Trial 30 finished with value: 39459.41429614896 and parameters: {'n_estimators': 769, 'lambda_l2': 9.958598787432438, 'bagging_fraction': 0.9497323136135287, 'num_leaves': 482, 'feature_fraction': 0.6646758331155727, 'max_depth': 15, 'learning_rate': 0.0025216472034268467, 'subsample': 0.6228410845446541, 'colsample_bytree': 0.8054384065804729, 'min_data_in_leaf': 58, 'max_bin': 295}. Best is trial 27 with value: 38366.63512123794.




[I 2023-11-14 07:38:39,157] Trial 31 finished with value: 38475.77727169641 and parameters: {'n_estimators': 845, 'lambda_l2': 8.874509471024307, 'bagging_fraction': 0.8987469861919891, 'num_leaves': 833, 'feature_fraction': 0.7125950149184351, 'max_depth': 12, 'learning_rate': 0.009021259539163119, 'subsample': 0.8620361200244543, 'colsample_bytree': 0.902060457381532, 'min_data_in_leaf': 45, 'max_bin': 165}. Best is trial 27 with value: 38366.63512123794.




[I 2023-11-14 07:38:52,386] Trial 32 finished with value: 38721.99744335057 and parameters: {'n_estimators': 854, 'lambda_l2': 8.152149826665585, 'bagging_fraction': 0.8715648017098235, 'num_leaves': 553, 'feature_fraction': 0.6310830331453144, 'max_depth': 13, 'learning_rate': 0.013587499652858515, 'subsample': 0.7976655912560283, 'colsample_bytree': 0.9398417250728438, 'min_data_in_leaf': 38, 'max_bin': 208}. Best is trial 27 with value: 38366.63512123794.




[I 2023-11-14 07:39:01,912] Trial 33 finished with value: 38358.47512708974 and parameters: {'n_estimators': 892, 'lambda_l2': 9.120174322435526, 'bagging_fraction': 0.8212524434096794, 'num_leaves': 312, 'feature_fraction': 0.6864365936394377, 'max_depth': 10, 'learning_rate': 0.009025702249254646, 'subsample': 0.7227675964333954, 'colsample_bytree': 0.8500717211078491, 'min_data_in_leaf': 47, 'max_bin': 186}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:39:12,225] Trial 34 finished with value: 38568.15802240149 and parameters: {'n_estimators': 888, 'lambda_l2': 9.1947521247167, 'bagging_fraction': 0.8153536953056564, 'num_leaves': 298, 'feature_fraction': 0.6774758257628701, 'max_depth': 10, 'learning_rate': 0.005594454463976644, 'subsample': 0.7035763233854005, 'colsample_bytree': 0.8560471276080658, 'min_data_in_leaf': 67, 'max_bin': 217}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:39:25,730] Trial 35 finished with value: 38433.94360548685 and parameters: {'n_estimators': 952, 'lambda_l2': 9.088134337062714, 'bagging_fraction': 0.8772114422714393, 'num_leaves': 445, 'feature_fraction': 0.6987460548706966, 'max_depth': 11, 'learning_rate': 0.008081669924653458, 'subsample': 0.8924389961519081, 'colsample_bytree': 0.8062045848726094, 'min_data_in_leaf': 31, 'max_bin': 150}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:39:38,987] Trial 36 finished with value: 38580.619674959955 and parameters: {'n_estimators': 950, 'lambda_l2': 9.142003588994932, 'bagging_fraction': 0.8798688020260801, 'num_leaves': 448, 'feature_fraction': 0.6943012101447136, 'max_depth': 9, 'learning_rate': 0.004335885741470699, 'subsample': 0.9015944904764788, 'colsample_bytree': 0.7771809516250275, 'min_data_in_leaf': 26, 'max_bin': 154}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:39:56,581] Trial 37 finished with value: 38493.68230391849 and parameters: {'n_estimators': 935, 'lambda_l2': 9.217184376516988, 'bagging_fraction': 0.925600219499232, 'num_leaves': 293, 'feature_fraction': 0.6130836695915051, 'max_depth': 11, 'learning_rate': 0.006185018167697081, 'subsample': 0.8542925559685568, 'colsample_bytree': 0.9987801493523015, 'min_data_in_leaf': 32, 'max_bin': 140}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:40:07,877] Trial 38 finished with value: 38951.45870137653 and parameters: {'n_estimators': 908, 'lambda_l2': 9.559080223322075, 'bagging_fraction': 0.8806961803715622, 'num_leaves': 351, 'feature_fraction': 0.664990927008856, 'max_depth': 8, 'learning_rate': 0.003312178872449354, 'subsample': 0.9166242946416379, 'colsample_bytree': 0.8048202391135235, 'min_data_in_leaf': 29, 'max_bin': 131}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:40:22,908] Trial 39 finished with value: 38519.12722343993 and parameters: {'n_estimators': 982, 'lambda_l2': 8.796445529952827, 'bagging_fraction': 0.8297211699863656, 'num_leaves': 439, 'feature_fraction': 0.7022200422896903, 'max_depth': 10, 'learning_rate': 0.008093831811501289, 'subsample': 0.8293193634200635, 'colsample_bytree': 0.6637403589642128, 'min_data_in_leaf': 22, 'max_bin': 172}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:40:33,411] Trial 40 finished with value: 38450.57203168928 and parameters: {'n_estimators': 953, 'lambda_l2': 6.5652971603616015, 'bagging_fraction': 0.7842333436465975, 'num_leaves': 141, 'feature_fraction': 0.6604324925926955, 'max_depth': 10, 'learning_rate': 0.011889723268458888, 'subsample': 0.7303923091895896, 'colsample_bytree': 0.7150981567836613, 'min_data_in_leaf': 52, 'max_bin': 275}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:40:43,402] Trial 41 finished with value: 38466.45288632068 and parameters: {'n_estimators': 831, 'lambda_l2': 7.9144660553725785, 'bagging_fraction': 0.8571108455669122, 'num_leaves': 537, 'feature_fraction': 0.7646276949178893, 'max_depth': 11, 'learning_rate': 0.007912498397394151, 'subsample': 0.740802590161897, 'colsample_bytree': 0.852160328302262, 'min_data_in_leaf': 78, 'max_bin': 196}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:40:55,942] Trial 42 finished with value: 38495.95756574286 and parameters: {'n_estimators': 892, 'lambda_l2': 7.45244280631027, 'bagging_fraction': 0.8272324400615318, 'num_leaves': 640, 'feature_fraction': 0.7157610653487053, 'max_depth': 11, 'learning_rate': 0.005879001907286505, 'subsample': 0.8548133816303772, 'colsample_bytree': 0.945576733314843, 'min_data_in_leaf': 60, 'max_bin': 218}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:41:06,465] Trial 43 finished with value: 38551.35245744521 and parameters: {'n_estimators': 899, 'lambda_l2': 9.585470125223946, 'bagging_fraction': 0.8531844171278923, 'num_leaves': 470, 'feature_fraction': 0.6847118974197975, 'max_depth': 9, 'learning_rate': 0.010449800133491529, 'subsample': 0.9634947260435232, 'colsample_bytree': 0.8148713486546576, 'min_data_in_leaf': 14, 'max_bin': 190}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:41:18,653] Trial 44 finished with value: 38435.264860239484 and parameters: {'n_estimators': 793, 'lambda_l2': 8.736391429834825, 'bagging_fraction': 0.8981177243154277, 'num_leaves': 411, 'feature_fraction': 0.6261205328513119, 'max_depth': 14, 'learning_rate': 0.0077587803342942544, 'subsample': 0.8926869618493646, 'colsample_bytree': 0.8786144011038255, 'min_data_in_leaf': 68, 'max_bin': 159}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:41:34,303] Trial 45 finished with value: 38585.76641454848 and parameters: {'n_estimators': 790, 'lambda_l2': 8.730336717183128, 'bagging_fraction': 0.9122485457619444, 'num_leaves': 399, 'feature_fraction': 0.5932301530251893, 'max_depth': 14, 'learning_rate': 0.004215147037339746, 'subsample': 0.8961731024174344, 'colsample_bytree': 0.9304500659927992, 'min_data_in_leaf': 48, 'max_bin': 154}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:41:47,098] Trial 46 finished with value: 38521.56021869959 and parameters: {'n_estimators': 718, 'lambda_l2': 7.943453844663894, 'bagging_fraction': 0.9388375318754775, 'num_leaves': 330, 'feature_fraction': 0.6398029275816542, 'max_depth': 15, 'learning_rate': 0.007009899507288785, 'subsample': 0.9904664167282979, 'colsample_bytree': 0.7641806423484908, 'min_data_in_leaf': 55, 'max_bin': 129}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:42:00,372] Trial 47 finished with value: 38510.89076331572 and parameters: {'n_estimators': 971, 'lambda_l2': 7.24899707712127, 'bagging_fraction': 0.8942611282107091, 'num_leaves': 420, 'feature_fraction': 0.6205333024581456, 'max_depth': 14, 'learning_rate': 0.012088302164183771, 'subsample': 0.9234923813024067, 'colsample_bytree': 0.8662145592363091, 'min_data_in_leaf': 69, 'max_bin': 160}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:42:11,892] Trial 48 finished with value: 38485.466166527 and parameters: {'n_estimators': 876, 'lambda_l2': 9.47781656266903, 'bagging_fraction': 0.9720717537394429, 'num_leaves': 367, 'feature_fraction': 0.5903990430353143, 'max_depth': 10, 'learning_rate': 0.005216849178715095, 'subsample': 0.8793564569423541, 'colsample_bytree': 0.8255920483702012, 'min_data_in_leaf': 41, 'max_bin': 146}. Best is trial 33 with value: 38358.47512708974.




[I 2023-11-14 07:42:24,117] Trial 49 finished with value: 38473.87042981599 and parameters: {'n_estimators': 922, 'lambda_l2': 9.996428539218133, 'bagging_fraction': 0.7817085130448806, 'num_leaves': 618, 'feature_fraction': 0.6774395466933044, 'max_depth': 13, 'learning_rate': 0.010528239772901673, 'subsample': 0.8151795563646692, 'colsample_bytree': 0.9589587581031029, 'min_data_in_leaf': 66, 'max_bin': 251}. Best is trial 33 with value: 38358.47512708974.


Best hyperparameters: {'n_estimators': 892, 'lambda_l2': 9.120174322435526, 'bagging_fraction': 0.8212524434096794, 'num_leaves': 312, 'feature_fraction': 0.6864365936394377, 'max_depth': 10, 'learning_rate': 0.009025702249254646, 'subsample': 0.7227675964333954, 'colsample_bytree': 0.8500717211078491, 'min_data_in_leaf': 47, 'max_bin': 186}
Best RMSE: 38358.47512708974


In [8]:
pred_test = np.zeros(shape=test1.values.shape[0])

pred_test = est.predict(test1)

pred_test = pred_test * .969 + 10
# pred_test *= 0.9915
subm = pd.read_csv('../../Dataset/sample_submission.csv/sample_submission.csv')
subm['price_doc'] = pred_test
subm.to_csv('./clean data/output.csv',index=False)
subm.head()

Unnamed: 0,id,price_doc
0,30474,5455544.0
1,30475,8213875.0
2,30476,5528125.0
3,30477,6540667.0
4,30478,5083761.0
