In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error, r2_score, mean_squared_error

from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from category_encoders import MEstimateEncoder, TargetEncoder
from sklearn.inspection import permutation_importance

import seaborn as sns
import shap

import lightgbm as lgb

import sys
sys.path.append('../lightgbm')

In [14]:
# File Paths
test_df_path = '../../Dataset/test.csv/test.csv'
process_train_path = './processed/processed_train_2.csv'
processed_test_path = './processed/processed_test_2.csv'
feats_path = './best_feats/Onehot_only_mean_NANs_modified_prices.csv'
output_path = './output/lgbm_pred_topFeat.csv'

top_n = 75

In [15]:
feats_df = pd.read_csv(feats_path)
feats = feats_df['col_name'].values.tolist()[:top_n]
processed_df = pd.read_csv(process_train_path)
processed_df = processed_df[feats+['price_doc']]
X = processed_df.drop(['price_doc'], axis=1)
y = processed_df['price_doc']
X = X[feats]

test_df = pd.read_csv(test_df_path)
processed_test_df = pd.read_csv(processed_test_path)
processed_test_df

Unnamed: 0,id,full_sq,life_sq,floor,max_floor,build_year,num_room,kitch_sq,state,area_m,...,big_road1_1line_yes,railroad_1line_no,railroad_1line_yes,material_1.0,material_2.0,material_3.0,material_4.0,material_5.0,material_6.0,material_nan
0,30474,39.00,20.700000,2,9,1998.000000,1,8.9,3.000000,2.615514e+07,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30475,79.20,34.404467,8,17,0.000000,3,1.0,1.000000,2.553630e+07,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30476,40.50,25.100000,3,5,1960.000000,2,4.8,2.000000,9.946335e+06,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,30477,62.80,36.000000,17,17,2016.000000,2,62.8,3.000000,2.149409e+07,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,30478,40.00,40.000000,17,17,0.000000,1,1.0,1.000000,2.553630e+07,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7657,38131,52.20,31.800000,10,12,1973.000000,2,9.1,2.000000,7.811375e+06,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7658,38132,54.09,34.404467,14,0,1879.046638,2,0.0,2.105145,5.299528e+07,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7659,38133,41.08,1.000000,12,1,1.000000,1,1.0,1.000000,7.307411e+06,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7660,38134,34.80,19.800000,8,9,1977.000000,1,6.4,2.000000,7.128794e+06,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [16]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
from sklearn.metrics import mean_squared_error
import optuna

def objective(trial):
    params = {
        "objective": "regression",
        "metric": "rmse",
        "verbosity": -1,
        "n_estimators": trial.suggest_int("n_estimators", 800, 1200),
#         "boosting": trial.suggest_categorical("boosting", ["gbdt", "rf", "dart"]),
        "lambda_l2": trial.suggest_float("lambda_l2", 0, 10),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0, 1),
        "bagging_freq": 1,
        "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.5, 1),
        "max_depth": trial.suggest_int("max_depth", 1, 50),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 100),
    }

    model = lgb.LGBMRegressor(**params)
    model.fit(X_train, y_train, verbose=False)
    predictions = model.predict(X_val)
    rmse = mean_squared_error(y_val, predictions, squared=False)
    return rmse

In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

[I 2023-10-30 21:40:07,450] A new study created in memory with name: no-name-bfc67154-3467-46cf-972b-a7bfa4ae289b
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:10,627] Trial 0 finished with value: 2618056.877218267 and parameters: {'n_estimators': 837, 'lambda_l2': 8.070371159313087, 'bagging_fraction': 0.6804008636370036, 'num_leaves': 973, 'feature_fraction': 0.6070130407242049, 'max_depth': 8, 'learning_rate': 0.004163955700652882, 'subsample': 0.433891617622374, 'colsample_bytree': 0.9356367928201346, 'min_data_in_leaf': 49}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:14,082] Trial 1 finished with value: 2812628.002089625 and parameters: {'n_estimators': 999, 'lambda_l2': 0.9026628948332671, 'bagging_fraction': 0.09475506939480982, 'num_leaves': 812, 'feature_fraction': 0.820364070733431, 'max_depth': 43, 'learning_rate': 0.09692546105835612, 'subsample': 0.4902873656989232, 'colsample_bytree': 0.4515205695085263, 'min_data_in_leaf': 34}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:17,693] Trial 2 finished with value: 2800346.492234071 and parameters: {'n_estimators': 1048, 'lambda_l2': 0.1877330442950309, 'bagging_fraction': 0.26193960507959335, 'num_leaves': 447, 'feature_fraction': 0.7080547656914368, 'max_depth': 25, 'learning_rate': 0.0029049463387501597, 'subsample': 0.8683617589248103, 'colsample_bytree': 0.35104741834464226, 'min_data_in_leaf': 85}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:18,449] Trial 3 finished with value: 3084544.7787292777 and parameters: {'n_estimators': 825, 'lambda_l2': 7.7057963224344315, 'bagging_fraction': 0.8038288446553353, 'num_leaves': 111, 'feature_fraction': 0.9788133445875804, 'max_depth': 1, 'learning_rate': 0.011438165241437289, 'subsample': 0.5445618456481013, 'colsample_bytree': 0.20791374815453273, 'min_data_in_leaf': 76}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:20,508] Trial 4 finished with value: 2768201.264350771 and parameters: {'n_estimators': 1127, 'lambda_l2': 9.425981493067182, 'bagging_fraction': 0.028421577873133708, 'num_leaves': 904, 'feature_fraction': 0.8910267588246196, 'max_depth': 46, 'learning_rate': 0.021906287726234017, 'subsample': 0.9085367440796484, 'colsample_bytree': 0.7167564408633093, 'min_data_in_leaf': 24}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:22,892] Trial 5 finished with value: 2655908.9195820987 and parameters: {'n_estimators': 898, 'lambda_l2': 6.347273797832639, 'bagging_fraction': 0.4720523200101785, 'num_leaves': 652, 'feature_fraction': 0.7783126233071264, 'max_depth': 6, 'learning_rate': 0.003155283901962325, 'subsample': 0.1545645182273886, 'colsample_bytree': 0.4164870693328896, 'min_data_in_leaf': 17}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:27,391] Trial 6 finished with value: 2802847.831976643 and parameters: {'n_estimators': 886, 'lambda_l2': 0.6732651053804661, 'bagging_fraction': 0.3000511563449547, 'num_leaves': 968, 'feature_fraction': 0.6024637007434313, 'max_depth': 50, 'learning_rate': 0.0026961573037983613, 'subsample': 0.7048277331534254, 'colsample_bytree': 0.8271777284101702, 'min_data_in_leaf': 70}. Best is trial 0 with value: 2618056.877218267.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:35,087] Trial 7 finished with value: 2515496.802435277 and parameters: {'n_estimators': 1110, 'lambda_l2': 8.99933453019248, 'bagging_fraction': 0.9861111965554947, 'num_leaves': 233, 'feature_fraction': 0.5932884969431067, 'max_depth': 19, 'learning_rate': 0.025811388483065502, 'subsample': 0.36772258498936145, 'colsample_bytree': 0.08769354411683271, 'min_data_in_leaf': 38}. Best is trial 7 with value: 2515496.802435277.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:42,080] Trial 8 finished with value: 3182729.9956990564 and parameters: {'n_estimators': 854, 'lambda_l2': 2.1898021417566618, 'bagging_fraction': 0.6330662994041001, 'num_leaves': 839, 'feature_fraction': 0.9303116417234455, 'max_depth': 49, 'learning_rate': 0.001068734960221782, 'subsample': 0.1017484959044933, 'colsample_bytree': 0.34925571895613494, 'min_data_in_leaf': 93}. Best is trial 7 with value: 2515496.802435277.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:40:48,010] Trial 9 finished with value: 2478494.487583945 and parameters: {'n_estimators': 957, 'lambda_l2': 1.0179522290678555, 'bagging_fraction': 0.803425892987424, 'num_leaves': 730, 'feature_fraction': 0.756874719068369, 'max_depth': 9, 'learning_rate': 0.010123433800404112, 'subsample': 0.4105167842639978, 'colsample_bytree': 0.5230497041584328, 'min_data_in_leaf': 10}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:41:06,517] Trial 10 finished with value: 2526715.547149343 and parameters: {'n_estimators': 973, 'lambda_l2': 3.9298345876599052, 'bagging_fraction': 0.9919119826916728, 'num_leaves': 549, 'feature_fraction': 0.6868023224716062, 'max_depth': 16, 'learning_rate': 0.00916619233643637, 'subsample': 0.25442623386959407, 'colsample_bytree': 0.6400884122622181, 'min_data_in_leaf': 10}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:41:14,229] Trial 11 finished with value: 2546370.855030307 and parameters: {'n_estimators': 1120, 'lambda_l2': 4.407360047365406, 'bagging_fraction': 0.984767426557977, 'num_leaves': 234, 'feature_fraction': 0.5070728391389424, 'max_depth': 20, 'learning_rate': 0.030010976410640603, 'subsample': 0.3330305810817188, 'colsample_bytree': 0.08104636731866165, 'min_data_in_leaf': 42}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:41:27,458] Trial 12 finished with value: 2579726.4230302507 and parameters: {'n_estimators': 1200, 'lambda_l2': 2.737040196594438, 'bagging_fraction': 0.8209473722318564, 'num_leaves': 365, 'feature_fraction': 0.8227942394397916, 'max_depth': 36, 'learning_rate': 0.02955048086877713, 'subsample': 0.3327799074602602, 'colsample_bytree': 0.06500808213424997, 'min_data_in_leaf': 62}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:41:29,132] Trial 13 finished with value: 2481918.221393387 and parameters: {'n_estimators': 947, 'lambda_l2': 5.753927699736524, 'bagging_fraction': 0.852313216946015, 'num_leaves': 12, 'feature_fraction': 0.7177790954048031, 'max_depth': 13, 'learning_rate': 0.05974415165579799, 'subsample': 0.5889025072321834, 'colsample_bytree': 0.554324644431842, 'min_data_in_leaf': 32}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:41:31,156] Trial 14 finished with value: 2526038.6945417942 and parameters: {'n_estimators': 944, 'lambda_l2': 5.870585760528631, 'bagging_fraction': 0.8063589445673898, 'num_leaves': 18, 'feature_fraction': 0.7413698633843271, 'max_depth': 12, 'learning_rate': 0.09507362722864741, 'subsample': 0.6196349504882227, 'colsample_bytree': 0.5809005611622863, 'min_data_in_leaf': 26}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:01,659] Trial 15 finished with value: 2595856.3475133106 and parameters: {'n_estimators': 933, 'lambda_l2': 3.440066941265295, 'bagging_fraction': 0.5946786497854999, 'num_leaves': 698, 'feature_fraction': 0.785815550845606, 'max_depth': 32, 'learning_rate': 0.05388505660502659, 'subsample': 0.7328786907695617, 'colsample_bytree': 0.49812448576444096, 'min_data_in_leaf': 10}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:02,662] Trial 16 finished with value: 3015289.1390910745 and parameters: {'n_estimators': 1042, 'lambda_l2': 5.307171473145013, 'bagging_fraction': 0.8640988917977943, 'num_leaves': 639, 'feature_fraction': 0.6762968224625838, 'max_depth': 1, 'learning_rate': 0.012289381210638665, 'subsample': 0.55281062015222, 'colsample_bytree': 0.6669158428089498, 'min_data_in_leaf': 27}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:07,195] Trial 17 finished with value: 2576769.346308078 and parameters: {'n_estimators': 1036, 'lambda_l2': 4.95524622313244, 'bagging_fraction': 0.7121029436485716, 'num_leaves': 365, 'feature_fraction': 0.8531990317244464, 'max_depth': 12, 'learning_rate': 0.059419133327262806, 'subsample': 0.4370357822863218, 'colsample_bytree': 0.5654655531899987, 'min_data_in_leaf': 53}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:24,170] Trial 18 finished with value: 2516193.706184781 and parameters: {'n_estimators': 918, 'lambda_l2': 1.802383913254415, 'bagging_fraction': 0.5395782801096557, 'num_leaves': 738, 'feature_fraction': 0.7522472752343152, 'max_depth': 25, 'learning_rate': 0.00683956820017519, 'subsample': 0.2043188564350566, 'colsample_bytree': 0.7533274151237536, 'min_data_in_leaf': 20}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:26,939] Trial 19 finished with value: 2492599.3282083604 and parameters: {'n_estimators': 975, 'lambda_l2': 3.121132625762649, 'bagging_fraction': 0.7240959238329505, 'num_leaves': 518, 'feature_fraction': 0.8542782665005656, 'max_depth': 7, 'learning_rate': 0.018480113948139906, 'subsample': 0.6747706743568774, 'colsample_bytree': 0.5941710740711968, 'min_data_in_leaf': 32}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:31,651] Trial 20 finished with value: 2517143.906343298 and parameters: {'n_estimators': 862, 'lambda_l2': 1.7250949645765528, 'bagging_fraction': 0.8903539680716503, 'num_leaves': 178, 'feature_fraction': 0.7263463279725653, 'max_depth': 15, 'learning_rate': 0.03875588475890609, 'subsample': 0.5917544236272421, 'colsample_bytree': 0.5206405838285584, 'min_data_in_leaf': 46}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:34,275] Trial 21 finished with value: 2491787.383820944 and parameters: {'n_estimators': 968, 'lambda_l2': 2.8665175377186323, 'bagging_fraction': 0.7318695139153409, 'num_leaves': 493, 'feature_fraction': 0.8662970760072221, 'max_depth': 7, 'learning_rate': 0.017036826207814028, 'subsample': 0.6756283799405316, 'colsample_bytree': 0.6165800454821055, 'min_data_in_leaf': 35}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:37,718] Trial 22 finished with value: 2511709.154089841 and parameters: {'n_estimators': 964, 'lambda_l2': 1.3589103030261043, 'bagging_fraction': 0.7419193997788859, 'num_leaves': 396, 'feature_fraction': 0.9238133271229885, 'max_depth': 9, 'learning_rate': 0.016448230882148935, 'subsample': 0.7877863083942468, 'colsample_bytree': 0.6480128325123674, 'min_data_in_leaf': 60}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:39,523] Trial 23 finished with value: 2516803.403977535 and parameters: {'n_estimators': 1010, 'lambda_l2': 0.004486626720613174, 'bagging_fraction': 0.9175931768470668, 'num_leaves': 14, 'feature_fraction': 0.79162546564188, 'max_depth': 4, 'learning_rate': 0.01553088922724556, 'subsample': 0.6475199454247743, 'colsample_bytree': 0.5087077261092536, 'min_data_in_leaf': 17}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:53,402] Trial 24 finished with value: 2513985.941370859 and parameters: {'n_estimators': 1078, 'lambda_l2': 2.7868188489830983, 'bagging_fraction': 0.7907023976473979, 'num_leaves': 578, 'feature_fraction': 0.7564941581385395, 'max_depth': 21, 'learning_rate': 0.007718720655297967, 'subsample': 0.5175122901034654, 'colsample_bytree': 0.7052125305740131, 'min_data_in_leaf': 34}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:42:58,758] Trial 25 finished with value: 2514413.355066459 and parameters: {'n_estimators': 1004, 'lambda_l2': 4.217582419607769, 'bagging_fraction': 0.895022147993046, 'num_leaves': 804, 'feature_fraction': 0.8304940739799199, 'max_depth': 12, 'learning_rate': 0.04184840027069265, 'subsample': 0.804233097654114, 'colsample_bytree': 0.7788000098208443, 'min_data_in_leaf': 40}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:05,477] Trial 26 finished with value: 2506222.712912684 and parameters: {'n_estimators': 903, 'lambda_l2': 2.3419218961465553, 'bagging_fraction': 0.6772946681794844, 'num_leaves': 287, 'feature_fraction': 0.6671150402186607, 'max_depth': 16, 'learning_rate': 0.022333117564073832, 'subsample': 0.9998045374029307, 'colsample_bytree': 0.5943570146993157, 'min_data_in_leaf': 28}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:25,135] Trial 27 finished with value: 2527642.32352931 and parameters: {'n_estimators': 951, 'lambda_l2': 1.1336121516527793, 'bagging_fraction': 0.7436778824463849, 'num_leaves': 446, 'feature_fraction': 0.7809036035048456, 'max_depth': 29, 'learning_rate': 0.01315513550857914, 'subsample': 0.5879210785702433, 'colsample_bytree': 0.41991219550808306, 'min_data_in_leaf': 16}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:26,329] Trial 28 finished with value: 2562041.731743584 and parameters: {'n_estimators': 993, 'lambda_l2': 3.5442663719581113, 'bagging_fraction': 0.6137302177632521, 'num_leaves': 108, 'feature_fraction': 0.7139273030888993, 'max_depth': 3, 'learning_rate': 0.019069758602324793, 'subsample': 0.6457467720856108, 'colsample_bytree': 0.6417097601787115, 'min_data_in_leaf': 57}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:32,539] Trial 29 finished with value: 2528443.841011546 and parameters: {'n_estimators': 876, 'lambda_l2': 2.0674816707988084, 'bagging_fraction': 0.851767532268116, 'num_leaves': 986, 'feature_fraction': 0.6383193792401416, 'max_depth': 10, 'learning_rate': 0.005842895262915903, 'subsample': 0.4501575982426962, 'colsample_bytree': 0.9597087787186139, 'min_data_in_leaf': 47}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:34,637] Trial 30 finished with value: 2502291.4830347947 and parameters: {'n_estimators': 917, 'lambda_l2': 6.701186670274389, 'bagging_fraction': 0.6820729271179529, 'num_leaves': 615, 'feature_fraction': 0.8767282762168068, 'max_depth': 6, 'learning_rate': 0.011646847884975801, 'subsample': 0.4661908145576784, 'colsample_bytree': 0.8696519071336677, 'min_data_in_leaf': 49}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:37,560] Trial 31 finished with value: 2483942.3007275374 and parameters: {'n_estimators': 976, 'lambda_l2': 3.1601105395562774, 'bagging_fraction': 0.755452185148508, 'num_leaves': 484, 'feature_fraction': 0.8589970500320452, 'max_depth': 7, 'learning_rate': 0.01685002213201871, 'subsample': 0.7047489681284234, 'colsample_bytree': 0.5590357796365362, 'min_data_in_leaf': 33}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:41,403] Trial 32 finished with value: 2480938.084330866 and parameters: {'n_estimators': 936, 'lambda_l2': 2.8268389426924263, 'bagging_fraction': 0.7771351165415364, 'num_leaves': 479, 'feature_fraction': 0.7944161442441018, 'max_depth': 9, 'learning_rate': 0.015423476520670701, 'subsample': 0.7198960735757496, 'colsample_bytree': 0.48082948370409584, 'min_data_in_leaf': 32}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:43:54,239] Trial 33 finished with value: 2510439.2659870004 and parameters: {'n_estimators': 932, 'lambda_l2': 0.9938577940732047, 'bagging_fraction': 0.9286700937756591, 'num_leaves': 726, 'feature_fraction': 0.8118490706705963, 'max_depth': 15, 'learning_rate': 0.009410473388384962, 'subsample': 0.7319257388057228, 'colsample_bytree': 0.4653721749887135, 'min_data_in_leaf': 21}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:01,468] Trial 34 finished with value: 2503201.728273314 and parameters: {'n_estimators': 1025, 'lambda_l2': 3.8047616087299536, 'bagging_fraction': 0.7835049789851831, 'num_leaves': 454, 'feature_fraction': 0.7602257201818275, 'max_depth': 10, 'learning_rate': 0.005323586015345192, 'subsample': 0.5164004347883702, 'colsample_bytree': 0.529645413147408, 'min_data_in_leaf': 30}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:02,879] Trial 35 finished with value: 2558570.78491246 and parameters: {'n_estimators': 987, 'lambda_l2': 3.2554434212084975, 'bagging_fraction': 0.828570682517416, 'num_leaves': 322, 'feature_fraction': 0.8065526313289022, 'max_depth': 3, 'learning_rate': 0.013550385857289125, 'subsample': 0.550760347818081, 'colsample_bytree': 0.3770082714620414, 'min_data_in_leaf': 42}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:09,169] Trial 36 finished with value: 2521061.1179447323 and parameters: {'n_estimators': 1074, 'lambda_l2': 4.536122683531255, 'bagging_fraction': 0.7868786536940353, 'num_leaves': 891, 'feature_fraction': 0.7324633398900874, 'max_depth': 13, 'learning_rate': 0.0096394883221087, 'subsample': 0.5914430871637022, 'colsample_bytree': 0.5497682693303939, 'min_data_in_leaf': 70}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:35,307] Trial 37 finished with value: 2559825.906421116 and parameters: {'n_estimators': 846, 'lambda_l2': 0.40686067891885547, 'bagging_fraction': 0.927237547902681, 'num_leaves': 586, 'feature_fraction': 0.8361269109070962, 'max_depth': 40, 'learning_rate': 0.023080082125123635, 'subsample': 0.4927121437045684, 'colsample_bytree': 0.45689346251053226, 'min_data_in_leaf': 14}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:40,328] Trial 38 finished with value: 2532978.857350763 and parameters: {'n_estimators': 823, 'lambda_l2': 0.6993712144590017, 'bagging_fraction': 0.8721704941625475, 'num_leaves': 90, 'feature_fraction': 0.7095013438047725, 'max_depth': 22, 'learning_rate': 0.029702132580773317, 'subsample': 0.7844533310380788, 'colsample_bytree': 0.30146160301298147, 'min_data_in_leaf': 22}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:48,623] Trial 39 finished with value: 2512285.0279384614 and parameters: {'n_estimators': 902, 'lambda_l2': 1.4505204591963747, 'bagging_fraction': 0.6775293113756724, 'num_leaves': 802, 'feature_fraction': 0.775990355530391, 'max_depth': 18, 'learning_rate': 0.013569723676261077, 'subsample': 0.4111415238960377, 'colsample_bytree': 0.46714425782214236, 'min_data_in_leaf': 37}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:50,794] Trial 40 finished with value: 2566327.4766320563 and parameters: {'n_estimators': 1061, 'lambda_l2': 2.4704730348962145, 'bagging_fraction': 0.7762811325289046, 'num_leaves': 667, 'feature_fraction': 0.799742948507864, 'max_depth': 5, 'learning_rate': 0.008376830953276624, 'subsample': 0.7144651095807839, 'colsample_bytree': 0.5511277486817537, 'min_data_in_leaf': 99}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:54,701] Trial 41 finished with value: 2493706.760142893 and parameters: {'n_estimators': 957, 'lambda_l2': 3.1458143499425857, 'bagging_fraction': 0.841778044032509, 'num_leaves': 464, 'feature_fraction': 0.8831064232129546, 'max_depth': 9, 'learning_rate': 0.01752371003334071, 'subsample': 0.6822668859388339, 'colsample_bytree': 0.49390746240967337, 'min_data_in_leaf': 37}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:55,672] Trial 42 finished with value: 2924370.0142463027 and parameters: {'n_estimators': 980, 'lambda_l2': 1.9120358910159108, 'bagging_fraction': 0.7495318505515659, 'num_leaves': 506, 'feature_fraction': 0.8503008952493492, 'max_depth': 1, 'learning_rate': 0.023012556460745195, 'subsample': 0.6305749635464287, 'colsample_bytree': 0.599228150375754, 'min_data_in_leaf': 32}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:44:58,908] Trial 43 finished with value: 2515087.36165442 and parameters: {'n_estimators': 926, 'lambda_l2': 2.7028781425331245, 'bagging_fraction': 0.6432459925946336, 'num_leaves': 508, 'feature_fraction': 0.8983864998308013, 'max_depth': 8, 'learning_rate': 0.010721584750216544, 'subsample': 0.6694579785110352, 'colsample_bytree': 0.42851635343074407, 'min_data_in_leaf': 53}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:45:00,863] Trial 44 finished with value: 2512260.1775654824 and parameters: {'n_estimators': 965, 'lambda_l2': 4.131480344384665, 'bagging_fraction': 0.7106546397011245, 'num_leaves': 1024, 'feature_fraction': 0.9702835349817451, 'max_depth': 5, 'learning_rate': 0.015405985585978063, 'subsample': 0.7272416569118908, 'colsample_bytree': 0.6909833560035341, 'min_data_in_leaf': 43}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:45:03,903] Trial 45 finished with value: 2484370.3501765043 and parameters: {'n_estimators': 1012, 'lambda_l2': 2.2699538787099485, 'bagging_fraction': 0.8231064998718511, 'num_leaves': 425, 'feature_fraction': 0.8089248495016613, 'max_depth': 7, 'learning_rate': 0.01104542859429231, 'subsample': 0.8357194479065891, 'colsample_bytree': 0.5900908437711846, 'min_data_in_leaf': 35}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:45:16,138] Trial 46 finished with value: 2509689.7678189 and parameters: {'n_estimators': 1017, 'lambda_l2': 2.2458978414041284, 'bagging_fraction': 0.9594350130364306, 'num_leaves': 408, 'feature_fraction': 0.8114824823770405, 'max_depth': 18, 'learning_rate': 0.010497966549286008, 'subsample': 0.8343775860489986, 'colsample_bytree': 0.5464930246368849, 'min_data_in_leaf': 26}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:45:24,358] Trial 47 finished with value: 2569331.270529251 and parameters: {'n_estimators': 949, 'lambda_l2': 0.41556391368440204, 'bagging_fraction': 0.8289522632264283, 'num_leaves': 190, 'feature_fraction': 0.7687793905867762, 'max_depth': 13, 'learning_rate': 0.004236763872087451, 'subsample': 0.8848217818822173, 'colsample_bytree': 0.622045909678827, 'min_data_in_leaf': 67}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:45:33,681] Trial 48 finished with value: 2492613.0572465486 and parameters: {'n_estimators': 883, 'lambda_l2': 1.566186367849204, 'bagging_fraction': 0.9444037959357493, 'num_leaves': 303, 'feature_fraction': 0.7384381842733188, 'max_depth': 11, 'learning_rate': 0.007477748918775924, 'subsample': 0.7469741206698409, 'colsample_bytree': 0.6692301024811949, 'min_data_in_leaf': 13}. Best is trial 9 with value: 2478494.487583945.
'verbose' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.




[I 2023-10-30 21:45:42,521] Trial 49 finished with value: 2534601.5155673283 and parameters: {'n_estimators': 1002, 'lambda_l2': 0.8909567854657356, 'bagging_fraction': 0.8840030133934397, 'num_leaves': 543, 'feature_fraction': 0.831591996144048, 'max_depth': 24, 'learning_rate': 0.010961950013496138, 'subsample': 0.9298200607749817, 'colsample_bytree': 0.4996688377466395, 'min_data_in_leaf': 83}. Best is trial 9 with value: 2478494.487583945.


In [18]:
params = study.best_params
params["objective"] = "regression"
params["metric"] = "rmse"
params["verbosity"] = -1

#Best
params2 = {
    'boosting': 'gbdt', 
    'lambda_l2': 7.068023653932577, 
    'bagging_fraction': 0.6261319161311689, 
    'num_leaves': 151, 
    'feature_fraction': 0.976804739696195, 
    'max_depth': 8, 'learning_rate': 0.01552248392804131, 
    'subsample': 0.4408875142865268, 
    'colsample_bytree': 0.3829262582959811, 
    'min_data_in_leaf': 21,
    "objective": "regression",
    "metric": "rmse",
    "n_estimators": 1000,
    "verbosity": -1,
}

In [19]:
model = lgb.LGBMRegressor(**params2)

model.fit(X, y)



In [20]:
processed_test_df_copy = processed_test_df.copy()

processed_test_df = processed_test_df.drop(['id'], axis=1)

In [21]:
pred = model.predict(processed_test_df[feats])
prediction_df = pd.DataFrame({
    'id': processed_test_df_copy['id'],
    'price_doc': pred
})
prediction_df

Unnamed: 0,id,price_doc
0,30474,5.419743e+06
1,30475,8.015836e+06
2,30476,5.303301e+06
3,30477,6.122940e+06
4,30478,4.889536e+06
...,...,...
7657,38131,7.635401e+06
7658,38132,4.865493e+06
7659,38133,4.579440e+06
7660,38134,5.300390e+06


In [22]:
prediction_df.to_csv(output_path, index=False)

In [23]:
params

{'n_estimators': 957,
 'lambda_l2': 1.0179522290678555,
 'bagging_fraction': 0.803425892987424,
 'num_leaves': 730,
 'feature_fraction': 0.756874719068369,
 'max_depth': 9,
 'learning_rate': 0.010123433800404112,
 'subsample': 0.4105167842639978,
 'colsample_bytree': 0.5230497041584328,
 'min_data_in_leaf': 10,
 'objective': 'regression',
 'metric': 'rmse',
 'verbosity': -1}