In [105]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

cuda:0


In [106]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.cluster import DBSCAN
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMRegressor
from catboost import CatBoostClassifier

import pandas as pd
from category_encoders import OneHotEncoder, TargetEncoder

import warnings

# 경고 무시
warnings.filterwarnings("ignore", category=FutureWarning)

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# 한글 폰트 설정
plt.rcParams['font.family'] = 'Malgun Gothic'  # 예시로 'NanumGothic'을 사용

In [89]:
import random
import os

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [90]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

In [91]:
train = train.drop(['ID'], axis=1)
test = test.drop(['ID'], axis=1)

In [92]:
train

Unnamed: 0,Age,Gender,Education_Status,Employment_Status,Working_Week (Yearly),Industry_Status,Occupation_Status,Race,Hispanic_Origin,Martial_Status,...,Citizenship,Birth_Country,Birth_Country (Father),Birth_Country (Mother),Tax_Status,Gains,Losses,Dividends,Income_Status,Income
0,63,M,Middle (7-8),Full-Time,4,Social Services,Services,White,All other,Married,...,Native,US,US,US,Nonfiler,0,0,0,Unknown,425
1,37,M,Associates degree (Vocational),Full-Time,52,Entertainment,Services,White,All other,Separated,...,Native,US,US,US,Single,0,0,0,Under Median,0
2,58,F,High graduate,Full-Time,52,Manufacturing (Non-durable),Admin Support (include Clerical),Black,All other,Married,...,Native,US,US,US,Married Filling Jointly both under 65 (MFJ),3411,0,0,Under Median,860
3,44,M,High graduate,Full-Time,52,Retail,Technicians & Support,White,All other,Divorced,...,Native,US,US,US,Single,0,0,0,Under Median,850
4,37,F,High graduate,Full-Time,52,Retail,Sales,White,All other,Divorced,...,Native,US,US,US,Head of Household (HOH),0,0,0,Unknown,570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,33,M,High graduate,Children or Armed Forces,52,Manufacturing (Durable),Handlers/Cleaners,White,All other,Single,...,Native,US,US,US,Single,0,0,0,Under Median,1300
19996,20,F,College,Full-Time,12,Education,Admin Support (include Clerical),White,Mexican-American,Single,...,Native,US,Mexico,Mexico,Nonfiler,0,0,0,Under Median,850
19997,22,M,College,Children or Armed Forces,52,Transportation,Technicians & Support,White,All other,Single,...,Native,US,US,US,Single,0,0,0,Unknown,999
19998,76,F,High graduate,Not Working,0,Not in universe or children,Unknown,White,All other,Widowed,...,Native,US,Scotland,England,Single,0,0,0,Under Median,0


In [93]:
test['Household_Status'].fillna('Nonfamily householder', inplace=True)

In [94]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Age                     20000 non-null  int64 
 1   Gender                  20000 non-null  object
 2   Education_Status        20000 non-null  object
 3   Employment_Status       20000 non-null  object
 4   Working_Week (Yearly)   20000 non-null  int64 
 5   Industry_Status         20000 non-null  object
 6   Occupation_Status       20000 non-null  object
 7   Race                    20000 non-null  object
 8   Hispanic_Origin         20000 non-null  object
 9   Martial_Status          20000 non-null  object
 10  Household_Status        20000 non-null  object
 11  Household_Summary       20000 non-null  object
 12  Citizenship             20000 non-null  object
 13  Birth_Country           20000 non-null  object
 14  Birth_Country (Father)  20000 non-null  object
 15  Bi

In [95]:
# 원-핫 인코딩을 적용할 열 지정
one_hot_cols = ['Gender', 'Income_Status','Tax_Status','Citizenship']

# 원-핫 인코더 생성 및 train 데이터에 적용
one_hot_encoder = OneHotEncoder(cols=one_hot_cols, use_cat_names=True)
train_encoded = one_hot_encoder.fit_transform(train[one_hot_cols])

# 동일한 원-핫 인코더를 test 데이터에 적용
test_encoded = one_hot_encoder.transform(test[one_hot_cols])

In [96]:
# 레이블 인코더 객체 생성
label_encoder = LabelEncoder()

# one_hot_cols에 명시된 열을 제외한 카테고리형 변수를 인코딩 대상으로 설정
encoding_target = [col for col in train.columns if train[col].dtype == 'object' and col not in one_hot_cols]

# 인코딩 대상 열에 대해 레이블 인코딩 적용
for col in encoding_target:
    # train과 test 데이터셋에서 해당 열의 모든 값을 문자열로 변환
    test_encoded[col] = train[col].astype(str)
    test_encoded[col] = test[col].astype(str)

    # 레이블 인코더를 train 데이터에 적합시키고 변환
    label_encoder.fit(train[col])
    train_encoded[col] = label_encoder.transform(train[col])

    # test 데이터에도 동일한 인코더를 적용
    test_encoded[col] = label_encoder.transform(test[col])

In [97]:
train_encoded = pd.concat([train_encoded, train[['Working_Week (Yearly)','Age','Gains','Losses','Income']]], axis=1)
test_encoded = pd.concat([test_encoded, test[['Working_Week (Yearly)','Age','Gains','Losses']]], axis=1)

train = train_encoded
test = test_encoded

In [98]:
train = train.drop(train[train['Gains'] > 60000].index)

train['working_week_0'] = (train['Working_Week (Yearly)']<10).astype(int)
test['working_week_0'] = (test['Working_Week (Yearly)']<10).astype(int)

test['working_week_45']=(test['Working_Week (Yearly)']>45).astype(int)
test['working_week_45']=(test['Working_Week (Yearly)']>45).astype(int)

In [55]:
# # 로그 변환을 적용할 열 목록
# columns_to_log_transform = ['Working_Week (Yearly)', 'Age', 'Gains', 'Losses', 'Income']
# 
# # 각 열에 대해 로그 변환 적용
# for column in columns_to_log_transform:
#     train[column] = np.log1p(train[column])
# 
# columns_to_log_transform_test = ['Working_Week (Yearly)', 'Age', 'Gains', 'Losses']
# 
# for column in columns_to_log_transform_test:
#     test[column] = np.log1p(test[column])



In [56]:
train

Unnamed: 0,Gender_M,Gender_F,Income_Status_Unknown,Income_Status_Under Median,Income_Status_Over Median,Tax_Status_Nonfiler,Tax_Status_Single,Tax_Status_Married Filling Jointly both under 65 (MFJ),Tax_Status_Head of Household (HOH),Tax_Status_Married Filling Jointly one over 65 & one under 65 (MFJ),...,Birth_Country,Birth_Country (Father),Birth_Country (Mother),Working_Week (Yearly),Age,Gains,Losses,Income,working_week_0,working_week_45
0,1,0,1,0,0,1,0,0,0,0,...,39,39,39,4,63,0,0,425,1,0
1,1,0,0,1,0,0,1,0,0,0,...,39,39,39,52,37,0,0,0,0,1
2,0,1,0,1,0,0,0,1,0,0,...,39,39,39,52,58,3411,0,860,0,1
3,1,0,0,1,0,0,1,0,0,0,...,39,39,39,52,44,0,0,850,0,1
4,0,1,1,0,0,0,0,0,1,0,...,39,39,39,52,37,0,0,570,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,1,0,0,1,0,0,1,0,0,0,...,39,39,39,52,33,0,0,1300,0,1
19996,0,1,0,1,0,1,0,0,0,0,...,39,25,25,12,20,0,0,850,0,0
19997,1,0,1,0,0,0,1,0,0,0,...,39,39,39,52,22,0,0,999,0,1
19998,0,1,0,1,0,0,1,0,0,0,...,39,34,8,0,76,0,0,0,1,0


In [57]:
test

Unnamed: 0,Gender_M,Gender_F,Income_Status_Unknown,Income_Status_Under Median,Income_Status_Over Median,Tax_Status_Nonfiler,Tax_Status_Single,Tax_Status_Married Filling Jointly both under 65 (MFJ),Tax_Status_Head of Household (HOH),Tax_Status_Married Filling Jointly one over 65 & one under 65 (MFJ),...,Martial_Status,Household_Status,Household_Summary,Birth_Country,Birth_Country (Father),Birth_Country (Mother),Working_Week (Yearly),Age,Gains,Losses
0,1,0,0,1,0,0,1,0,0,0,...,5,18,4,39,40,40,0,79,0,0
1,1,0,0,1,0,1,0,0,0,0,...,5,2,0,39,39,39,0,47,0,0
2,0,1,0,1,0,0,1,0,0,0,...,5,2,0,39,39,39,52,18,0,0
3,0,1,1,0,0,0,0,1,0,0,...,1,30,7,39,39,39,30,39,0,0
4,1,0,1,0,0,1,0,0,0,0,...,5,7,2,39,39,39,0,6,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1,0,0,1,0,0,1,0,0,0,...,5,18,4,39,39,39,52,31,0,0
9996,1,0,0,1,0,0,0,1,0,0,...,1,16,4,39,39,39,52,27,0,0
9997,1,0,0,1,0,1,0,0,0,0,...,5,2,0,39,39,39,7,18,0,0
9998,1,0,0,1,0,1,0,0,0,0,...,5,7,2,39,39,39,0,9,0,0


In [58]:
# train['Income'] = np.log1p(train['Income'])

In [59]:
X = train[train.columns.drop('Income')]
Y = train['Income']

In [60]:
# X와 Y로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.05, random_state=42, shuffle=True)

In [61]:
import optuna
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# 데이터셋을 미리 준비하세요
# X_train, X_test, y_train, y_test = train_test_split(...)

def objective(trial):
    # Optuna가 탐색할 파라미터 공간 정의
    param = {
        'objective': 'reg:squarederror',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 9),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
    }

    # XGBoost 모델 초기화 및 훈련
    model = XGBRegressor(**param)
    model.fit(X_train, y_train)

    # 예측 및 RMSE 계산
    preds = model.predict(X_test)
    preds[preds < 100] = 0
    rmse = np.sqrt(mean_squared_error(y_test, preds))

    return rmse

# Optuna study 객체 생성 및 최적화 실행
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100) # n_trials: 시도할 횟수

# 최적의 파라미터 출력
print(f"Best trial: {study.best_trial.params}")

[I 2024-03-29 23:21:36,490] A new study created in memory with name: no-name-098acb09-96c2-41ea-9677-857ddd64b9e2
[I 2024-03-29 23:21:38,544] Trial 0 finished with value: 626.7743456849909 and parameters: {'learning_rate': 0.15635580177779082, 'n_estimators': 727, 'max_depth': 9, 'min_child_weight': 6, 'subsample': 0.5718119595719013, 'colsample_bytree': 0.7492333826384184}. Best is trial 0 with value: 626.7743456849909.
[I 2024-03-29 23:21:38,886] Trial 1 finished with value: 512.3669629768935 and parameters: {'learning_rate': 0.08040649096029281, 'n_estimators': 392, 'max_depth': 3, 'min_child_weight': 7, 'subsample': 0.9038857309169082, 'colsample_bytree': 0.6262879140618095}. Best is trial 1 with value: 512.3669629768935.
[I 2024-03-29 23:21:39,658] Trial 2 finished with value: 532.7931002649436 and parameters: {'learning_rate': 0.11212484494296009, 'n_estimators': 803, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.6348264667186465, 'colsample_bytree': 0.6012836578468949}. 

Best trial: {'learning_rate': 0.05335048300623485, 'n_estimators': 326, 'max_depth': 3, 'min_child_weight': 9, 'subsample': 0.543193413822322, 'colsample_bytree': 0.6433479331784054}


In [81]:
from xgboost import XGBRegressor

# XGBRegressor 모델 초기화
xgb_model = XGBRegressor(objective='reg:squarederror',
                         learning_rate =0.05760153355463915,
                         n_estimators= 517,
                         max_depth= 3,
                         min_child_weight= 8,
                         subsample= 0.800517868378667,
                         colsample_bytree= 0.7715630487016912,
                         
)

# 모델 훈련
xgb_model.fit(X_train, y_train)

# 테스트 데이터에 대한 예측 수행
y_pred_xgb = xgb_model.predict(X_test)
y_pred_xgb[y_pred_xgb<100] = 0
# RMSE 계산
rmse_xgb = mean_squared_error(y_test, y_pred_xgb, squared=False)
print(f"XGB RMSE: {rmse_xgb}")

XGB RMSE: 512.9901477025223


In [64]:
def objective(trial):
    # Optuna가 탐색할 파라미터 공간 정의
    param = {
        'objective': 'regression',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'num_leaves': trial.suggest_int('num_leaves', 31, 256),  # LightGBM 특유의 파라미터
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 9),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
    }

    # LightGBM 모델 초기화 및 훈련
    model = LGBMRegressor(**param)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)])  # 조기 종료를 위한 설정 추가

    # 예측 및 RMSE 계산
    preds = model.predict(X_test)
    preds[preds < 100] = 0  # 특정 조건에 대한 후처리
    rmse = np.sqrt(mean_squared_error(y_test, preds))

    return rmse

# Optuna study 객체 생성 및 최적화 실행
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)  # n_trials: 시도할 횟수

# 최적의 파라미터 출력
print(f"Best trial: {study.best_trial.params}")

[I 2024-03-29 23:24:11,493] A new study created in memory with name: no-name-fce12d39-7dc3-43a9-930f-35ebb02438c5


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002133 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:12,040] Trial 0 finished with value: 552.3845494125699 and parameters: {'learning_rate': 0.21515982062573485, 'n_estimators': 516, 'max_depth': 6, 'num_leaves': 251, 'min_child_weight': 4, 'subsample': 0.9247568289585262, 'colsample_bytree': 0.8824948406164708}. Best is trial 0 with value: 552.3845494125699.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000782 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:12,292] Trial 1 finished with value: 514.0351650130392 and parameters: {'learning_rate': 0.021338810714605576, 'n_estimators': 469, 'max_depth': 4, 'num_leaves': 153, 'min_child_weight': 4, 'subsample': 0.7666826144247822, 'colsample_bytree': 0.7367606099034297}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000710 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:13,035] Trial 2 finished with value: 548.5577789113653 and parameters: {'learning_rate': 0.14048499020486474, 'n_estimators': 923, 'max_depth': 7, 'num_leaves': 55, 'min_child_weight': 8, 'subsample': 0.5046966673996076, 'colsample_bytree': 0.7261012615621567}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:13,186] Trial 3 finished with value: 517.711738220013 and parameters: {'learning_rate': 0.028195791782096156, 'n_estimators': 223, 'max_depth': 4, 'num_leaves': 51, 'min_child_weight': 1, 'subsample': 0.758285851104401, 'colsample_bytree': 0.5057176253363034}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:13,594] Trial 4 finished with value: 562.5623134858037 and parameters: {'learning_rate': 0.2760170732550693, 'n_estimators': 310, 'max_depth': 9, 'num_leaves': 185, 'min_child_weight': 4, 'subsample': 0.6615937924617046, 'colsample_bytree': 0.7602338662958832}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000768 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:13,971] Trial 5 finished with value: 517.1625130228414 and parameters: {'learning_rate': 0.05922246660000004, 'n_estimators': 448, 'max_depth': 7, 'num_leaves': 40, 'min_child_weight': 1, 'subsample': 0.7846463948654578, 'colsample_bytree': 0.504799342409444}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000682 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:14,415] Trial 6 finished with value: 549.3345906060161 and parameters: {'learning_rate': 0.18310568309447284, 'n_estimators': 408, 'max_depth': 8, 'num_leaves': 227, 'min_child_weight': 9, 'subsample': 0.5193838483423519, 'colsample_bytree': 0.7481637429616392}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000671 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:14,887] Trial 7 finished with value: 538.437598990222 and parameters: {'learning_rate': 0.1084572461098634, 'n_estimators': 561, 'max_depth': 7, 'num_leaves': 242, 'min_child_weight': 6, 'subsample': 0.8582915840858699, 'colsample_bytree': 0.617997227011821}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000583 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:15,323] Trial 8 finished with value: 545.7003608193731 and parameters: {'learning_rate': 0.19528760190745975, 'n_estimators': 781, 'max_depth': 5, 'num_leaves': 126, 'min_child_weight': 2, 'subsample': 0.5200692367008353, 'colsample_bytree': 0.6898929020862854}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000569 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:15,519] Trial 9 finished with value: 523.8862603830212 and parameters: {'learning_rate': 0.15821655473256743, 'n_estimators': 324, 'max_depth': 5, 'num_leaves': 158, 'min_child_weight': 8, 'subsample': 0.6040291489146754, 'colsample_bytree': 0.9968959886162118}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000743 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:15,842] Trial 10 finished with value: 519.9542268555674 and parameters: {'learning_rate': 0.010631134110776886, 'n_estimators': 730, 'max_depth': 3, 'num_leaves': 93, 'min_child_weight': 6, 'subsample': 0.9824515075010545, 'colsample_bytree': 0.8232832571408835}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:16,099] Trial 11 finished with value: 514.97455931788 and parameters: {'learning_rate': 0.07911835628416233, 'n_estimators': 545, 'max_depth': 3, 'num_leaves': 128, 'min_child_weight': 2, 'subsample': 0.7916259444361153, 'colsample_bytree': 0.5470161133922573}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000677 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:16,392] Trial 12 finished with value: 517.0391865488095 and parameters: {'learning_rate': 0.07809821427502595, 'n_estimators': 644, 'max_depth': 3, 'num_leaves': 124, 'min_child_weight': 3, 'subsample': 0.8235197364373958, 'colsample_bytree': 0.6024286906021785}. Best is trial 1 with value: 514.0351650130392.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:16,714] Trial 13 finished with value: 518.4845649578162 and parameters: {'learning_rate': 0.0649035081258586, 'n_estimators': 610, 'max_depth': 4, 'num_leaves': 185, 'min_child_weight': 3, 'subsample': 0.6734781949371909, 'colsample_bytree': 0.6115172988620998}. Best is trial 1 with value: 514.0351650130392.




[I 2024-03-29 23:24:16,829] Trial 14 finished with value: 513.598954365698 and parameters: {'learning_rate': 0.10350417368422773, 'n_estimators': 129, 'max_depth': 4, 'num_leaves': 90, 'min_child_weight': 5, 'subsample': 0.7119212428204025, 'colsample_bytree': 0.9020847280207196}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000682 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000704 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:16,945] Trial 15 finished with value: 516.2839480026137 and parameters: {'learning_rate': 0.11884885872145123, 'n_estimators': 102, 'max_depth': 5, 'num_leaves': 85, 'min_child_weight': 5, 'subsample': 0.7087739824657449, 'colsample_bytree': 0.9099374067411211}. Best is trial 14 with value: 513.598954365698.
[I 2024-03-29 23:24:17,054] Trial 16 finished with value: 518.7857993341672 and parameters: {'learning_rate': 0.042682441193711895, 'n_estimators': 106, 'max_depth': 4, 'num_leaves': 92, 'min_child_weight': 6, 'subsample': 0.6188987563072366, 'colsample_bytree': 0.8271802364586399}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000591 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:17,719] Trial 17 finished with value: 544.5294989823926 and parameters: {'learning_rate': 0.09908430055413028, 'n_estimators': 944, 'max_depth': 6, 'num_leaves': 167, 'min_child_weight': 5, 'subsample': 0.878499000641336, 'colsample_bytree': 0.9980769898226942}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000761 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:17,911] Trial 18 finished with value: 517.0382004173435 and parameters: {'learning_rate': 0.021312787293366144, 'n_estimators': 270, 'max_depth': 4, 'num_leaves': 197, 'min_child_weight': 7, 'subsample': 0.7217807531165574, 'colsample_bytree': 0.9312668516030107}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000698 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:18,068] Trial 19 finished with value: 534.8528566350303 and parameters: {'learning_rate': 0.2505236707250887, 'n_estimators': 194, 'max_depth': 5, 'num_leaves': 102, 'min_child_weight': 4, 'subsample': 0.587471103533189, 'colsample_bytree': 0.8264293476979507}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000691 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:18,254] Trial 20 finished with value: 516.8192256756051 and parameters: {'learning_rate': 0.1382066590738562, 'n_estimators': 388, 'max_depth': 3, 'num_leaves': 156, 'min_child_weight': 3, 'subsample': 0.6988468597337537, 'colsample_bytree': 0.688586538278492}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000732 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:18,593] Trial 21 finished with value: 517.2580860676635 and parameters: {'learning_rate': 0.08634925819835051, 'n_estimators': 708, 'max_depth': 3, 'num_leaves': 123, 'min_child_weight': 2, 'subsample': 0.7935641063022586, 'colsample_bytree': 0.5590163713535987}. Best is trial 14 with value: 513.598954365698.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000700 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:18,873] Trial 22 finished with value: 513.0702714132658 and parameters: {'learning_rate': 0.051991346955760825, 'n_estimators': 508, 'max_depth': 4, 'num_leaves': 140, 'min_child_weight': 2, 'subsample': 0.7598271635857259, 'colsample_bytree': 0.6883567195879576}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:19,152] Trial 23 finished with value: 513.6141565209368 and parameters: {'learning_rate': 0.04344242360218227, 'n_estimators': 456, 'max_depth': 4, 'num_leaves': 144, 'min_child_weight': 4, 'subsample': 0.7536187935351427, 'colsample_bytree': 0.6802461669165911}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000715 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:19,659] Trial 24 finished with value: 521.6813096114633 and parameters: {'learning_rate': 0.046562606765682794, 'n_estimators': 811, 'max_depth': 5, 'num_leaves': 69, 'min_child_weight': 5, 'subsample': 0.7304978039921175, 'colsample_bytree': 0.6725907901513134}. Best is trial 22 with value: 513.0702714132658.




[I 2024-03-29 23:24:19,871] Trial 25 finished with value: 519.7583652477463 and parameters: {'learning_rate': 0.11877010937721598, 'n_estimators': 370, 'max_depth': 4, 'num_leaves': 143, 'min_child_weight': 2, 'subsample': 0.8313145624548997, 'colsample_bytree': 0.7791044888406771}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000744 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:20,082] Trial 26 finished with value: 515.3597557278438 and parameters: {'learning_rate': 0.04817914349526322, 'n_estimators': 183, 'max_depth': 6, 'num_leaves': 112, 'min_child_weight': 3, 'subsample': 0.6460514698495949, 'colsample_bytree': 0.6548006441240856}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000657 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000690 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:20,396] Trial 27 finished with value: 528.1022321432881 and parameters: {'learning_rate': 0.0900627232468119, 'n_estimators': 479, 'max_depth': 5, 'num_leaves': 207, 'min_child_weight': 7, 'subsample': 0.8927565357527378, 'colsample_bytree': 0.7915984754540109}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000697 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:20,738] Trial 28 finished with value: 520.0421283280334 and parameters: {'learning_rate': 0.06980302267209719, 'n_estimators': 656, 'max_depth': 4, 'num_leaves': 73, 'min_child_weight': 1, 'subsample': 0.6845127713924449, 'colsample_bytree': 0.7159540040750081}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:20,989] Trial 29 finished with value: 515.752715370354 and parameters: {'learning_rate': 0.03806105678552942, 'n_estimators': 263, 'max_depth': 6, 'num_leaves': 137, 'min_child_weight': 4, 'subsample': 0.7429443383946931, 'colsample_bytree': 0.8729009859595559}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000680 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:21,338] Trial 30 finished with value: 522.9314062586384 and parameters: {'learning_rate': 0.13278712875124796, 'n_estimators': 853, 'max_depth': 3, 'num_leaves': 110, 'min_child_weight': 4, 'subsample': 0.9549374682423244, 'colsample_bytree': 0.6349918084575888}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:21,634] Trial 31 finished with value: 517.9163780987534 and parameters: {'learning_rate': 0.012452944578537578, 'n_estimators': 496, 'max_depth': 4, 'num_leaves': 169, 'min_child_weight': 4, 'subsample': 0.7644319210158224, 'colsample_bytree': 0.716333232016699}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:21,944] Trial 32 finished with value: 516.3958441413436 and parameters: {'learning_rate': 0.05476234445712441, 'n_estimators': 578, 'max_depth': 4, 'num_leaves': 157, 'min_child_weight': 5, 'subsample': 0.8166550690361561, 'colsample_bytree': 0.7367913320137316}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000653 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:22,197] Trial 33 finished with value: 514.2757437140826 and parameters: {'learning_rate': 0.03103104116643041, 'n_estimators': 435, 'max_depth': 4, 'num_leaves': 149, 'min_child_weight': 5, 'subsample': 0.7616506236705161, 'colsample_bytree': 0.7012740946962512}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:22,545] Trial 34 finished with value: 515.577113463984 and parameters: {'learning_rate': 0.030899524164388834, 'n_estimators': 521, 'max_depth': 5, 'num_leaves': 180, 'min_child_weight': 6, 'subsample': 0.7358344001519772, 'colsample_bytree': 0.6589032226015736}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000657 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:22,740] Trial 35 finished with value: 524.689906308996 and parameters: {'learning_rate': 0.1669827841035002, 'n_estimators': 341, 'max_depth': 4, 'num_leaves': 31, 'min_child_weight': 4, 'subsample': 0.653051951227845, 'colsample_bytree': 0.7801347742222333}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000692 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:23,319] Trial 36 finished with value: 533.1084019699886 and parameters: {'learning_rate': 0.06505665591177458, 'n_estimators': 459, 'max_depth': 9, 'num_leaves': 139, 'min_child_weight': 3, 'subsample': 0.8465076369612468, 'colsample_bytree': 0.8785527422362033}. Best is trial 22 with value: 513.0702714132658.
[I 2024-03-29 23:24:23,544] Trial 37 finished with value: 515.537382955845 and parameters: {'learning_rate': 0.10029580598037535, 'n_estimators': 414, 'max_depth': 3, 'num_leaves': 210, 'min_child_weight': 1, 'subsample': 0.7922156118021807, 'colsample_bytree': 0.5792685205724427}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000728 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000664 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:23,857] Trial 38 finished with value: 550.0221012354646 and parameters: {'learning_rate': 0.23291066244304942, 'n_estimators': 271, 'max_depth': 8, 'num_leaves': 71, 'min_child_weight': 3, 'subsample': 0.7598764698820301, 'colsample_bytree': 0.752049935514771}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000726 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:24,020] Trial 39 finished with value: 519.2708635675884 and parameters: {'learning_rate': 0.02500488189100502, 'n_estimators': 160, 'max_depth': 5, 'num_leaves': 58, 'min_child_weight': 7, 'subsample': 0.9131865297489373, 'colsample_bytree': 0.9669017788209734}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000669 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:24,495] Trial 40 finished with value: 527.4488878389617 and parameters: {'learning_rate': 0.06030578348664799, 'n_estimators': 515, 'max_depth': 7, 'num_leaves': 168, 'min_child_weight': 2, 'subsample': 0.5635308747993473, 'colsample_bytree': 0.8529489378755951}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:24,747] Trial 41 finished with value: 513.8858424496351 and parameters: {'learning_rate': 0.035581916943103704, 'n_estimators': 426, 'max_depth': 4, 'num_leaves': 148, 'min_child_weight': 5, 'subsample': 0.7774394864308617, 'colsample_bytree': 0.6959890780787502}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000657 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:25,254] Trial 42 finished with value: 514.2731945682223 and parameters: {'learning_rate': 0.013952999076496397, 'n_estimators': 992, 'max_depth': 4, 'num_leaves': 176, 'min_child_weight': 6, 'subsample': 0.6992999901478613, 'colsample_bytree': 0.7289062538052846}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000701 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:25,551] Trial 43 finished with value: 539.7187058331288 and parameters: {'learning_rate': 0.2959775664046116, 'n_estimators': 580, 'max_depth': 4, 'num_leaves': 114, 'min_child_weight': 5, 'subsample': 0.7756374053530686, 'colsample_bytree': 0.6434740735928554}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:25,774] Trial 44 finished with value: 514.9475577600261 and parameters: {'learning_rate': 0.036672136592934425, 'n_estimators': 373, 'max_depth': 3, 'num_leaves': 133, 'min_child_weight': 4, 'subsample': 0.8102390007651339, 'colsample_bytree': 0.678416974652537}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000711 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:26,020] Trial 45 finished with value: 521.4841108838671 and parameters: {'learning_rate': 0.07591280393321764, 'n_estimators': 317, 'max_depth': 5, 'num_leaves': 147, 'min_child_weight': 6, 'subsample': 0.7429492324834509, 'colsample_bytree': 0.7617964250390467}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:26,277] Trial 46 finished with value: 514.1690516310539 and parameters: {'learning_rate': 0.05115994500661718, 'n_estimators': 448, 'max_depth': 4, 'num_leaves': 187, 'min_child_weight': 4, 'subsample': 0.7169192769809061, 'colsample_bytree': 0.7037217232221316}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000691 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:26,553] Trial 47 finished with value: 519.8084335421545 and parameters: {'learning_rate': 0.11943367126582648, 'n_estimators': 634, 'max_depth': 3, 'num_leaves': 119, 'min_child_weight': 5, 'subsample': 0.851506664802328, 'colsample_bytree': 0.802634959099161}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000681 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:26,992] Trial 48 finished with value: 527.5938207979499 and parameters: {'learning_rate': 0.08679553783239274, 'n_estimators': 692, 'max_depth': 5, 'num_leaves': 97, 'min_child_weight': 5, 'subsample': 0.6769026319046135, 'colsample_bytree': 0.6242612533375157}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:27,331] Trial 49 finished with value: 514.0521438381172 and parameters: {'learning_rate': 0.02232909938635346, 'n_estimators': 555, 'max_depth': 4, 'num_leaves': 150, 'min_child_weight': 9, 'subsample': 0.7854938670783339, 'colsample_bytree': 0.6001257377931677}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:27,598] Trial 50 finished with value: 523.9974258114274 and parameters: {'learning_rate': 0.19907382691891967, 'n_estimators': 609, 'max_depth': 3, 'num_leaves': 248, 'min_child_weight': 6, 'subsample': 0.6362340221141566, 'colsample_bytree': 0.7416775535680646}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:27,922] Trial 51 finished with value: 514.8092890965776 and parameters: {'learning_rate': 0.021210680525757288, 'n_estimators': 549, 'max_depth': 4, 'num_leaves': 156, 'min_child_weight': 8, 'subsample': 0.7995869976856629, 'colsample_bytree': 0.5938739938837719}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000683 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:28,257] Trial 52 finished with value: 515.1515192727687 and parameters: {'learning_rate': 0.03402583345281125, 'n_estimators': 482, 'max_depth': 5, 'num_leaves': 131, 'min_child_weight': 9, 'subsample': 0.7748138905101546, 'colsample_bytree': 0.6662008681048134}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000656 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:28,559] Trial 53 finished with value: 513.6565451510235 and parameters: {'learning_rate': 0.05650964110782507, 'n_estimators': 519, 'max_depth': 4, 'num_leaves': 151, 'min_child_weight': 7, 'subsample': 0.8290973872883054, 'colsample_bytree': 0.558413972043778}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000746 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:28,810] Trial 54 finished with value: 514.9968864056347 and parameters: {'learning_rate': 0.05845035542087725, 'n_estimators': 422, 'max_depth': 4, 'num_leaves': 163, 'min_child_weight': 7, 'subsample': 0.8330243504734306, 'colsample_bytree': 0.9218569808392236}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000699 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:29,092] Trial 55 finished with value: 513.2974864056367 and parameters: {'learning_rate': 0.04447830273622825, 'n_estimators': 525, 'max_depth': 3, 'num_leaves': 84, 'min_child_weight': 3, 'subsample': 0.8719326258722977, 'colsample_bytree': 0.5183303839079575}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:29,373] Trial 56 finished with value: 515.6274684241653 and parameters: {'learning_rate': 0.07388627362020862, 'n_estimators': 522, 'max_depth': 3, 'num_leaves': 63, 'min_child_weight': 3, 'subsample': 0.8707117622440514, 'colsample_bytree': 0.5185209832328985}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000697 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:29,698] Trial 57 finished with value: 516.4888004663698 and parameters: {'learning_rate': 0.1045401355885892, 'n_estimators': 594, 'max_depth': 3, 'num_leaves': 83, 'min_child_weight': 2, 'subsample': 0.9569829236851197, 'colsample_bytree': 0.5294912811537624}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:29,899] Trial 58 finished with value: 514.7080811636577 and parameters: {'learning_rate': 0.0427621200999063, 'n_estimators': 344, 'max_depth': 3, 'num_leaves': 43, 'min_child_weight': 3, 'subsample': 0.8786209344879553, 'colsample_bytree': 0.565386628445697}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000614 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:30,441] Trial 59 finished with value: 519.8419897051589 and parameters: {'learning_rate': 0.08432933745586105, 'n_estimators': 752, 'max_depth': 4, 'num_leaves': 105, 'min_child_weight': 1, 'subsample': 0.9088389108292817, 'colsample_bytree': 0.5007168649737047}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000670 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:30,965] Trial 60 finished with value: 519.132528144968 and parameters: {'learning_rate': 0.06506853564082007, 'n_estimators': 678, 'max_depth': 5, 'num_leaves': 88, 'min_child_weight': 2, 'subsample': 0.836423210458235, 'colsample_bytree': 0.53590246519868}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000677 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:31,248] Trial 61 finished with value: 513.6632840884037 and parameters: {'learning_rate': 0.04769644493205736, 'n_estimators': 473, 'max_depth': 4, 'num_leaves': 125, 'min_child_weight': 4, 'subsample': 0.809182382334025, 'colsample_bytree': 0.6988970792288589}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000724 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:31,564] Trial 62 finished with value: 516.1062706351114 and parameters: {'learning_rate': 0.05231432138344654, 'n_estimators': 493, 'max_depth': 4, 'num_leaves': 141, 'min_child_weight': 4, 'subsample': 0.8025674785826579, 'colsample_bytree': 0.5542395857820689}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:31,821] Trial 63 finished with value: 513.5198606518211 and parameters: {'learning_rate': 0.04312010971784043, 'n_estimators': 395, 'max_depth': 4, 'num_leaves': 80, 'min_child_weight': 4, 'subsample': 0.8192519869334535, 'colsample_bytree': 0.6453833238152594}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000878 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:32,072] Trial 64 finished with value: 514.8756320187841 and parameters: {'learning_rate': 0.04556542125016959, 'n_estimators': 462, 'max_depth': 3, 'num_leaves': 103, 'min_child_weight': 4, 'subsample': 0.8637341763288369, 'colsample_bytree': 0.5811867176939299}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000728 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:32,324] Trial 65 finished with value: 515.8129637328083 and parameters: {'learning_rate': 0.06958371788653528, 'n_estimators': 392, 'max_depth': 4, 'num_leaves': 83, 'min_child_weight': 3, 'subsample': 0.7545492407786452, 'colsample_bytree': 0.6391174897780747}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000692 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:32,690] Trial 66 finished with value: 520.6112383995887 and parameters: {'learning_rate': 0.05303412628253354, 'n_estimators': 523, 'max_depth': 5, 'num_leaves': 80, 'min_child_weight': 4, 'subsample': 0.8921457386226866, 'colsample_bytree': 0.6190886812228493}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000757 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:32,888] Trial 67 finished with value: 515.8396956707891 and parameters: {'learning_rate': 0.08993986167262184, 'n_estimators': 293, 'max_depth': 4, 'num_leaves': 93, 'min_child_weight': 4, 'subsample': 0.8219225757276623, 'colsample_bytree': 0.683740151766957}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:33,176] Trial 68 finished with value: 523.0979593663205 and parameters: {'learning_rate': 0.1451574356278363, 'n_estimators': 636, 'max_depth': 3, 'num_leaves': 119, 'min_child_weight': 3, 'subsample': 0.8424432560467775, 'colsample_bytree': 0.714553677914189}. Best is trial 22 with value: 513.0702714132658.




[I 2024-03-29 23:24:33,402] Trial 69 finished with value: 516.0030716887286 and parameters: {'learning_rate': 0.09473169530557138, 'n_estimators': 243, 'max_depth': 5, 'num_leaves': 74, 'min_child_weight': 2, 'subsample': 0.7267481279376303, 'colsample_bytree': 0.5150147691586723}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:33,647] Trial 70 finished with value: 519.1455679727351 and parameters: {'learning_rate': 0.014532423104173076, 'n_estimators': 354, 'max_depth': 4, 'num_leaves': 50, 'min_child_weight': 5, 'subsample': 0.9365304246726329, 'colsample_bytree': 0.6540786892142935}. Best is trial 22 with value: 513.0702714132658.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000710 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000714 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:33,918] Trial 71 finished with value: 512.4194332178448 and parameters: {'learning_rate': 0.038654923859543146, 'n_estimators': 441, 'max_depth': 4, 'num_leaves': 125, 'min_child_weight': 5, 'subsample': 0.8168330627381541, 'colsample_bytree': 0.6920564591574419}. Best is trial 71 with value: 512.4194332178448.
[I 2024-03-29 23:24:34,058] Trial 72 finished with value: 516.8872940774125 and parameters: {'learning_rate': 0.04250358453966122, 'n_estimators': 135, 'max_depth': 4, 'num_leaves': 125, 'min_child_weight': 5, 'subsample': 0.8138441369500847, 'colsample_bytree': 0.6745749599943963}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000724 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:34,308] Trial 73 finished with value: 514.0116645499008 and parameters: {'learning_rate': 0.027805046217016124, 'n_estimators': 396, 'max_depth': 4, 'num_leaves': 135, 'min_child_weight': 4, 'subsample': 0.852091275845966, 'colsample_bytree': 0.7164669544475706}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000903 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:34,736] Trial 74 finished with value: 518.7661366664717 and parameters: {'learning_rate': 0.058641566627279054, 'n_estimators': 501, 'max_depth': 6, 'num_leaves': 117, 'min_child_weight': 8, 'subsample': 0.6964665939011628, 'colsample_bytree': 0.9756459768799879}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000670 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:35,005] Trial 75 finished with value: 514.7032924332315 and parameters: {'learning_rate': 0.08059278946991279, 'n_estimators': 538, 'max_depth': 3, 'num_leaves': 110, 'min_child_weight': 3, 'subsample': 0.8253007767205515, 'colsample_bytree': 0.6982208779100681}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000722 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:35,288] Trial 76 finished with value: 516.4056285071314 and parameters: {'learning_rate': 0.06819175555390536, 'n_estimators': 474, 'max_depth': 4, 'num_leaves': 128, 'min_child_weight': 5, 'subsample': 0.7508051197096339, 'colsample_bytree': 0.7649909465979084}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000694 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:35,578] Trial 77 finished with value: 513.9368229689752 and parameters: {'learning_rate': 0.03819265998748296, 'n_estimators': 450, 'max_depth': 4, 'num_leaves': 143, 'min_child_weight': 4, 'subsample': 0.8058698277021373, 'colsample_bytree': 0.6476119257728195}. Best is trial 71 with value: 512.4194332178448.
[I 2024-03-29 23:24:35,749] Trial 78 finished with value: 515.590571167386 and parameters: {'learning_rate': 0.11564381137719273, 'n_estimators': 202, 'max_depth': 3, 'num_leaves': 66, 'min_child_weight': 6, 'subsample': 0.7898399502053238, 'colsample_bytree': 0.5380758405302531}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000726 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000805 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:36,218] Trial 79 finished with value: 515.7502374660463 and parameters: {'learning_rate': 0.044845551440099524, 'n_estimators': 567, 'max_depth': 5, 'num_leaves': 96, 'min_child_weight': 3, 'subsample': 0.8847766129329279, 'colsample_bytree': 0.6307695808100298}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:36,507] Trial 80 finished with value: 516.001076084535 and parameters: {'learning_rate': 0.019691572032571974, 'n_estimators': 431, 'max_depth': 4, 'num_leaves': 163, 'min_child_weight': 4, 'subsample': 0.8606153288727183, 'colsample_bytree': 0.7295078328885495}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000677 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:36,768] Trial 81 finished with value: 513.1971241882928 and parameters: {'learning_rate': 0.03290174434563192, 'n_estimators': 407, 'max_depth': 4, 'num_leaves': 151, 'min_child_weight': 5, 'subsample': 0.771932736513129, 'colsample_bytree': 0.6934536934423617}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000696 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:37,073] Trial 82 finished with value: 514.4373228426613 and parameters: {'learning_rate': 0.030018269918165898, 'n_estimators': 506, 'max_depth': 4, 'num_leaves': 172, 'min_child_weight': 5, 'subsample': 0.7121300400613826, 'colsample_bytree': 0.6650664612497222}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:37,313] Trial 83 finished with value: 514.1984663890353 and parameters: {'learning_rate': 0.05971595042913588, 'n_estimators': 372, 'max_depth': 4, 'num_leaves': 152, 'min_child_weight': 6, 'subsample': 0.7666711394929805, 'colsample_bytree': 0.6859906249227349}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000729 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:37,587] Trial 84 finished with value: 513.8237043805906 and parameters: {'learning_rate': 0.049403592313206, 'n_estimators': 408, 'max_depth': 4, 'num_leaves': 137, 'min_child_weight': 5, 'subsample': 0.7358099273212947, 'colsample_bytree': 0.7059318672581201}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000641 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:37,892] Trial 85 finished with value: 512.8946839946311 and parameters: {'learning_rate': 0.03607340534253807, 'n_estimators': 478, 'max_depth': 4, 'num_leaves': 160, 'min_child_weight': 5, 'subsample': 0.7460065460513466, 'colsample_bytree': 0.6114849981668634}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000660 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:38,194] Trial 86 finished with value: 517.6784130344835 and parameters: {'learning_rate': 0.010932203666202282, 'n_estimators': 306, 'max_depth': 5, 'num_leaves': 161, 'min_child_weight': 5, 'subsample': 0.7785088468697547, 'colsample_bytree': 0.586622110378237}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:38,496] Trial 87 finished with value: 513.4940233553974 and parameters: {'learning_rate': 0.02717325218003054, 'n_estimators': 546, 'max_depth': 3, 'num_leaves': 185, 'min_child_weight': 5, 'subsample': 0.7416478653482951, 'colsample_bytree': 0.6110133119415341}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:38,775] Trial 88 finished with value: 515.7266213421683 and parameters: {'learning_rate': 0.023967791067618094, 'n_estimators': 542, 'max_depth': 3, 'num_leaves': 188, 'min_child_weight': 5, 'subsample': 0.6897579249217854, 'colsample_bytree': 0.6088696783381399}. Best is trial 71 with value: 512.4194332178448.
[I 2024-03-29 23:24:39,017] Trial 89 finished with value: 514.7656682714766 and parameters: {'learning_rate': 0.0382640270430268, 'n_estimators': 446, 'max_depth': 3, 'num_leaves': 195, 'min_child_weight': 5, 'subsample': 0.7460583822189044, 'colsample_bytree': 0.656060476585427}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000712 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:39,490] Trial 90 finished with value: 538.0642370074546 and parameters: {'learning_rate': 0.1642361696576186, 'n_estimators': 486, 'max_depth': 7, 'num_leaves': 215, 'min_child_weight': 6, 'subsample': 0.6645889683751219, 'colsample_bytree': 0.6272501730059837}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000767 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:39,872] Trial 91 finished with value: 513.9915186187224 and parameters: {'learning_rate': 0.03453744584904449, 'n_estimators': 595, 'max_depth': 4, 'num_leaves': 176, 'min_child_weight': 5, 'subsample': 0.7185002027466466, 'colsample_bytree': 0.5648128309228204}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000715 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:40,156] Trial 92 finished with value: 518.1629103204024 and parameters: {'learning_rate': 0.01881832840198667, 'n_estimators': 527, 'max_depth': 3, 'num_leaves': 146, 'min_child_weight': 6, 'subsample': 0.7036299434203325, 'colsample_bytree': 0.6003317668527959}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:40,794] Trial 93 finished with value: 517.0826734523528 and parameters: {'learning_rate': 0.029683518755424843, 'n_estimators': 459, 'max_depth': 8, 'num_leaves': 157, 'min_child_weight': 5, 'subsample': 0.7643978667427641, 'colsample_bytree': 0.5714175805246798}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:41,086] Trial 94 finished with value: 513.1794859541174 and parameters: {'learning_rate': 0.05778040569801704, 'n_estimators': 576, 'max_depth': 3, 'num_leaves': 77, 'min_child_weight': 8, 'subsample': 0.7289573535063778, 'colsample_bytree': 0.6701175918647639}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000730 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:41,389] Trial 95 finished with value: 514.8118937032676 and parameters: {'learning_rate': 0.04086143735113908, 'n_estimators': 662, 'max_depth': 3, 'num_leaves': 76, 'min_child_weight': 5, 'subsample': 0.7472389662683774, 'colsample_bytree': 0.6399199051803572}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:41,666] Trial 96 finished with value: 514.9779220607888 and parameters: {'learning_rate': 0.06284640465596832, 'n_estimators': 580, 'max_depth': 3, 'num_leaves': 88, 'min_child_weight': 2, 'subsample': 0.7294320051225518, 'colsample_bytree': 0.6705305305207582}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000691 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:41,987] Trial 97 finished with value: 514.7781098997638 and parameters: {'learning_rate': 0.027319671543150667, 'n_estimators': 617, 'max_depth': 3, 'num_leaves': 58, 'min_child_weight': 6, 'subsample': 0.7121077836881536, 'colsample_bytree': 0.6172500501091374}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000754 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:42,258] Trial 98 finished with value: 514.9816715067617 and parameters: {'learning_rate': 0.07564310411779676, 'n_estimators': 566, 'max_depth': 3, 'num_leaves': 99, 'min_child_weight': 4, 'subsample': 0.7375963225659615, 'colsample_bytree': 0.6816948873615805}. Best is trial 71 with value: 512.4194332178448.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000649 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197


[I 2024-03-29 23:24:42,528] Trial 99 finished with value: 515.1427940534703 and parameters: {'learning_rate': 0.05471500963759518, 'n_estimators': 550, 'max_depth': 3, 'num_leaves': 62, 'min_child_weight': 5, 'subsample': 0.7709952205390674, 'colsample_bytree': 0.6478741252170811}. Best is trial 71 with value: 512.4194332178448.


Best trial: {'learning_rate': 0.038654923859543146, 'n_estimators': 441, 'max_depth': 4, 'num_leaves': 125, 'min_child_weight': 5, 'subsample': 0.8168330627381541, 'colsample_bytree': 0.6920564591574419}


In [84]:
# XGBRegressor 모델 초기화
lgbm_model = LGBMRegressor(
                         learning_rate =0.038654923859543146,
                         n_estimators= 441,
                         max_depth= 4,
                         num_leaves= 125,
                         min_child_weight = 5,
                         subsample= 0.8168330627381541,
                         colsample_bytree= 0.6920564591574419,
                         )

# 모델 훈련
lgbm_model.fit(X_train, y_train)

# 테스트 데이터에 대한 예측 수행
y_pred_lgbm = lgbm_model.predict(X_test)
y_pred_lgbm[y_pred_lgbm<100] = 0
# RMSE 계산
rmse_lgbm = mean_squared_error(y_test, y_pred_lgbm, squared=False)
print(f"XGB RMSE: {rmse_lgbm}")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000655 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 553
[LightGBM] [Info] Number of data points in the train set: 18971, number of used features: 34
[LightGBM] [Info] Start training from score 556.577197
XGB RMSE: 512.4194332178448


In [78]:
import optuna
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# 데이터셋을 미리 준비하세요
# X_train, X_test, y_train, y_test = train_test_split(...)

def objective(trial):
    # Optuna가 탐색할 파라미터 공간 정의
    param = {
        'objective': 'RMSE',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-8, 10.0),  # L2 정규화 계수
        'bootstrap_type': 'Bernoulli',
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),  # 'Bernoulli' 부트스트랩 타입을 사용할 때 필요
        'random_seed' : 42
    }

    # CatBoost 모델 초기화 및 훈련
    model = CatBoostRegressor(**param, silent=True)  # silent=True로 설정하여 학습 중 메시지 출력 억제
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10)  # 조기 종료 설정

    # 예측 및 RMSE 계산
    preds = model.predict(X_test)
    preds[preds < 100] = 0  # 특정 조건에 대한 후처리
    rmse = np.sqrt(mean_squared_error(y_test, preds))

    return rmse

# Optuna study 객체 생성 및 최적화 실행
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)  # n_trials: 시도할 횟수

# 최적의 파라미터 출력
print(f"Best trial: {study.best_trial.params}")

[I 2024-03-29 23:34:20,383] A new study created in memory with name: no-name-326697d8-7fe4-4ef5-8255-da81b6adeb4d
[I 2024-03-29 23:34:20,575] Trial 0 finished with value: 519.9567689924547 and parameters: {'learning_rate': 0.25541266352952396, 'n_estimators': 121, 'max_depth': 6, 'l2_leaf_reg': 0.008108023337434817, 'subsample': 0.5751790029910593}. Best is trial 0 with value: 519.9567689924547.
[I 2024-03-29 23:34:20,704] Trial 1 finished with value: 535.6868774885494 and parameters: {'learning_rate': 0.24866093699161543, 'n_estimators': 632, 'max_depth': 6, 'l2_leaf_reg': 1.1420831865521811e-08, 'subsample': 0.9099846491870023}. Best is trial 0 with value: 519.9567689924547.
[I 2024-03-29 23:34:21,127] Trial 2 finished with value: 524.6666490606489 and parameters: {'learning_rate': 0.02809371402019535, 'n_estimators': 150, 'max_depth': 5, 'l2_leaf_reg': 5.570706495783961, 'subsample': 0.7820363209655286}. Best is trial 0 with value: 519.9567689924547.
[I 2024-03-29 23:34:21,327] Tria

KeyboardInterrupt: 

In [85]:
# XGBRegressor 모델 초기화
cat_model = CatBoostRegressor(
    learning_rate =0.09755345353231588,
    n_estimators= 372,
    max_depth= 8,
    l2_leaf_reg = 0.018838567676701338,
    subsample= 0.5839698627283093,
    bootstrap_type = 'Bernoulli',
    objective = 'RMSE',
    silent=True,
    random_seed = 42

)

# 모델 훈련
cat_model.fit(X_train, y_train)

# 테스트 데이터에 대한 예측 수행
y_pred_cat = cat_model.predict(X_test)
y_pred_cat[y_pred_cat<100] = 0
# RMSE 계산
rmse_cat = mean_squared_error(y_test, y_pred_cat, squared=False)
print(f"XGB RMSE: {rmse_cat}")

XGB RMSE: 515.6004968952714


In [100]:
pred_final_xgb = xgb_model.predict(test)
# pred_final = np.expm1(pred_final)
pred_final_xgb[pred_final_xgb<100] = 0

pred_final_cat = cat_model.predict(test)
# pred_final = np.expm1(pred_final)
pred_final_cat[pred_final_cat<100] = 0

pred_final_lgbm = lgbm_model.predict(test)
# pred_final = np.expm1(pred_final)
pred_final_lgbm[pred_final_lgbm<100] = 0



In [102]:
import numpy as np

# pred_final_xgb, pred_final_cat, pred_final_lgbm은 NumPy 배열로 가정
# 이 배열들은 각 모델의 예측값을 담고 있음

# 세 모델의 예측값을 평균내어 최종 예측값 계산
pred_ensemble = (pred_final_xgb + pred_final_cat + pred_final_lgbm) / 3



[  0.           0.         390.96818941 ... 338.24980672   0.
 757.67472684]


In [103]:
submission = pd.read_csv("../data/sample_submission.csv")
submission['Income'] = pred_ensemble
submission

Unnamed: 0,ID,Income
0,TEST_0000,0.000000
1,TEST_0001,0.000000
2,TEST_0002,390.968189
3,TEST_0003,702.355383
4,TEST_0004,0.000000
...,...,...
9995,TEST_9995,878.722877
9996,TEST_9996,783.873666
9997,TEST_9997,338.249807
9998,TEST_9998,0.000000


In [104]:
submission.to_csv("../submission/test10.csv", index = False)