## 1. 데이터 불러오기

In [45]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

data_columns = train.columns.drop(['ID','y'])

print(f'train shape : {train.shape}')
print(f'test shape : {test.shape}')

train shape : (40118, 13)
test shape : (4986, 12)


## 2. 데이터 전처리

### 이상치 처리

In [46]:
# # IQR을 사용한 이상치 탐지 및 제거 함수 정의
# def remove_outliers(df, column):
#     # 1사분위수(25%)와 3사분위수(75%) 계산
#     Q1 = df[column].quantile(0.25)
#     Q3 = df[column].quantile(0.75)
#     # IQR 계산
#     IQR = Q3 - Q1
#     # 이상치 범위 설정 (1.5 * IQR보다 더 크거나 작은 값들)
#     lower_bound = Q1 - 1.5 * IQR
#     upper_bound = Q3 + 1.5 * IQR
#     # 이상치가 아닌 데이터만 반환
#     return df[(df[column] >= lower_bound)] 
#             #   & (df[column] <= upper_bound)]

# # target 컬럼에서 이상치 제거
# train = remove_outliers(train, 'y')
# print(f'train shape : {train.shape}')

In [47]:
from scipy import stats

def remove_outliers_zscore(df, column, threshold=3):
    """
    Z-score를 사용하여 이상치를 제거하는 함수

    Parameters:
    df (DataFrame): 데이터 프레임
    column (str): 이상치를 제거할 열 이름
    threshold (float): 이상치로 간주할 Z-score의 임계값 (기본값은 3)

    Returns:
    DataFrame: 이상치가 제거된 데이터 프레임
    """
    # Z-score 계산
    z_scores = stats.zscore(df[column])
    # Z-score가 threshold보다 작은 데이터만 반환
    return df[(z_scores >= -threshold) & (z_scores <= threshold)]

# target 컬럼에서 이상치 제거
train = remove_outliers_zscore(train, 'y')
print(f'train shape : {train.shape}')

train shape : (39264, 13)


### 로그 변환

In [48]:
from sklearn.preprocessing import PowerTransformer

# Yeo-Johnson 변환기 생성
pt = PowerTransformer(method='yeo-johnson')

# ID와 y를 제외한 나머지 컬럼에 대해 Yeo-Johnson 변환 수행
train_yeo_johnson = train.copy()
train_yeo_johnson[data_columns] = pt.fit_transform(train[data_columns])
test_yeo_johnson = test.copy()
test_yeo_johnson[data_columns] = pt.fit_transform(test[data_columns])

### 다중공선성

In [49]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.decomposition import PCA

# 3. 다중공선성 처리 (VIF 계산)
X = train_yeo_johnson.drop(columns=['ID','x_8','y'])
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

print(vif_data)
print(vif_data.VIF.sum())

  feature        VIF
0     x_0   2.780132
1     x_1  10.335470
2     x_2  10.485913
3     x_3   4.556827
4     x_4  10.355824
5     x_5  29.238399
6     x_6   7.363162
7     x_7  49.935635
8     x_9  62.782760
9    x_10  24.549525
212.3836482733996


### 시각화

In [50]:
# import matplotlib.pyplot as plt
# import seaborn as sns

# # Create histograms for each variable
# fig, axes = plt.subplots(4, 3, figsize=(20, 20))
# axes = axes.ravel()
 
# for i, col in enumerate(train_yeo_johnson.columns[1:]):  # Skip the ID column
#     sns.histplot(train_yeo_johnson[col], ax=axes[i], kde=True)
#     axes[i].set_title(f'Distribution of {col}')

# plt.tight_layout()
# plt.show()

# # Create box plots for each variable
# fig, axes = plt.subplots(4, 3, figsize=(20, 20))
# axes = axes.ravel()

# for i, col in enumerate(train_yeo_johnson.columns[1:]):  # Skip the ID column
#     sns.boxplot(y=train_yeo_johnson[col], ax=axes[i])
#     axes[i].set_title(f'Box plot of {col}')

# plt.tight_layout()
# plt.show()

### 피쳐 생성

In [51]:
# train['x_8 * x_10'] = train['x_8'] * train['x_10']
# train['x_8 + x_10'] = train['x_8'] + train['x_10']
# train['x_8 - x_10'] = train['x_8'] - train['x_10']

# test['x_8 * x_10'] = test['x_8'] * test['x_10']
# test['x_8 + x_10'] = test['x_8'] + test['x_10']
# test['x_8 - x_10'] = test['x_8'] - test['x_10']

In [52]:
# train_x = train.drop(columns=['ID','x_2','y'])
# test_x = test.drop(columns=['ID','x_2'])
# train_y = train['y']

In [53]:
train_x = train_yeo_johnson.drop(columns=['ID','x_8','y'])
test_x = test_yeo_johnson.drop(columns=['ID','x_8'])
train_y = train_yeo_johnson['y']

### 데이터 스케일링

In [54]:
# # 3. 데이터 스케일링
# scaler = StandardScaler()
# # train_x 스케일링 후 데이터프레임으로 변환
# train_x_scaled = pd.DataFrame(scaler.fit_transform(train_x), columns=train_x.columns, index=train_x.index)
# # test_x 스케일링 후 데이터프레임으로 변환
# test_x_scaled = pd.DataFrame(scaler.transform(test_x), columns=test_x.columns, index=test_x.index)

## 3. 분석 모델 설계

In [55]:
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from optuna.integration import LightGBMPruningCallback

# 학습/테스트 데이터셋 분리
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2, random_state=42)

# 목적 함수 정의
def objective(trial):
    # 하이퍼파라미터 샘플링
    param = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-6, 1.0, log=True),  # 축소된 범위
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-6, 1.0, log=True),  # 축소된 범위
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),  # 범위 축소
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 0.9),  # 범위 축소
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 0.9),  # 범위 축소
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 5), 
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 50), 
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'force_col_wise': True  

    }

    # 모델 학습
    model = lgb.LGBMRegressor(**param)
    model.fit(X_train, y_train,
              eval_set=[(X_test, y_test)],
              callbacks=[lgb.early_stopping(100), LightGBMPruningCallback(trial, "rmse")],
              )
    
    # 예측 및 평가
    preds = model.predict(X_test)
    rmse = mean_squared_error(y_test, preds, squared=False)
    
    return rmse

# Optuna 스터디 생성 및 최적화 실행
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

trial = study.best_trial
best_params = trial.params

[I 2024-08-21 12:45:03,048] A new study created in memory with name: no-name-d526e488-0d6f-4651-9e12-dc67b682f594
[I 2024-08-21 12:45:03,160] Trial 0 finished with value: 1.4664201757912418 and parameters: {'lambda_l1': 0.009486795791669061, 'lambda_l2': 0.0001932475699902813, 'num_leaves': 24, 'feature_fraction': 0.7578113430208191, 'bagging_fraction': 0.5378074393213446, 'bagging_freq': 1, 'min_child_samples': 45, 'learning_rate': 0.03754765027132902}. Best is trial 0 with value: 1.4664201757912418.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[99]	valid_0's rmse: 1.46642
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:03,315] Trial 1 finished with value: 1.4924955549127914 and parameters: {'lambda_l1': 5.065821007202573e-05, 'lambda_l2': 0.0002787609836896145, 'num_leaves': 47, 'feature_fraction': 0.857759425519746, 'bagging_fraction': 0.525341604225671, 'bagging_freq': 2, 'min_child_samples': 23, 'learning_rate': 0.018315624709063227}. Best is trial 0 with value: 1.4664201757912418.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 1.4925
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[45]	valid_0's rmse: 1.47016


[I 2024-08-21 12:45:03,480] Trial 2 finished with value: 1.4701579372883669 and parameters: {'lambda_l1': 0.0037619403628038443, 'lambda_l2': 3.4641098334637217e-06, 'num_leaves': 71, 'feature_fraction': 0.8443656481982338, 'bagging_fraction': 0.8189461714595461, 'bagging_freq': 2, 'min_child_samples': 19, 'learning_rate': 0.08378108430199374}. Best is trial 0 with value: 1.4664201757912418.
[I 2024-08-21 12:45:03,586] Trial 3 finished with value: 1.4682951624455272 and parameters: {'lambda_l1': 0.0001780554650639969, 'lambda_l2': 0.5944943775910119, 'num_leaves': 21, 'feature_fraction': 0.8063541637818069, 'bagging_fraction': 0.5865538232126168, 'bagging_freq': 4, 'min_child_samples': 45, 'learning_rate': 0.0328583796726549}. Best is trial 0 with value: 1.4664201757912418.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 1.4683
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:03,844] Trial 4 finished with value: 1.5657784750699135 and parameters: {'lambda_l1': 0.00039413858149801303, 'lambda_l2': 4.405299243642216e-06, 'num_leaves': 119, 'feature_fraction': 0.6313122370101654, 'bagging_fraction': 0.8695555625844829, 'bagging_freq': 3, 'min_child_samples': 50, 'learning_rate': 0.011395118950489346}. Best is trial 0 with value: 1.4664201757912418.


Did not meet early stopping. Best iteration is:
[100]	valid_0's rmse: 1.56578
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[73]	valid_0's rmse: 1.46673


[I 2024-08-21 12:45:03,994] Trial 5 finished with value: 1.4667258256065814 and parameters: {'lambda_l1': 4.709288257081928e-06, 'lambda_l2': 0.0008386984945724405, 'num_leaves': 38, 'feature_fraction': 0.5831918875276552, 'bagging_fraction': 0.8223183589481318, 'bagging_freq': 5, 'min_child_samples': 24, 'learning_rate': 0.06509627947006306}. Best is trial 0 with value: 1.4664201757912418.
[I 2024-08-21 12:45:04,018] Trial 6 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:04,196] Trial 7 pruned. Trial was pruned at iteration 71.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:04,219] Trial 8 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:04,242] Trial 9 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:04,461] Trial 10 finished with value: 1.4681515372613874 and parameters: {'lambda_l1': 0.015194825408006212, 'lambda_l2': 0.0358935493579323, 'num_leaves': 60, 'feature_fraction': 0.7340797675570848, 'bagging_fraction': 0.6502109360485928, 'bagging_freq': 1, 'min_child_samples': 38, 'learning_rate': 0.040548797032062364}. Best is trial 0 with value: 1.4664201757912418.
[I 2024-08-21 12:45:04,612] Trial 11 finished with value: 1.4659483044030044 and parameters: {'lambda_l1': 4.2979864135678356e-06, 'lambda_l2': 0.009308895519839454, 'num_leaves': 21, 'feature_fraction': 0.5096220095804627, 'bagging_fraction': 0.6746097083964128, 'bagging_freq': 5, 'min_child_samples': 30, 'learning_rate': 0.04909763428155113}. Best is trial 11 with value: 1.4659483044030044.


Did not meet early stopping. Best iteration is:
[87]	valid_0's rmse: 1.46815
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[96]	valid_0's rmse: 1.46595
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:04,748] Trial 12 finished with value: 1.4674715106859146 and parameters: {'lambda_l1': 0.02784719402536447, 'lambda_l2': 0.017815592942338265, 'num_leaves': 20, 'feature_fraction': 0.504536246251775, 'bagging_fraction': 0.6559842319422479, 'bagging_freq': 5, 'min_child_samples': 35, 'learning_rate': 0.0507345798229617}. Best is trial 11 with value: 1.4659483044030044.
[I 2024-08-21 12:45:04,795] Trial 13 pruned. Trial was pruned at iteration 0.


Did not meet early stopping. Best iteration is:
[99]	valid_0's rmse: 1.46747
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:04,955] Trial 14 pruned. Trial was pruned at iteration 83.
[I 2024-08-21 12:45:05,002] Trial 15 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:05,047] Trial 16 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:05,204] Trial 17 pruned. Trial was pruned at iteration 63.
[I 2024-08-21 12:45:05,342] Trial 18 pruned. Trial was pruned at iteration 64.
[I 2024-08-21 12:45:05,388] Trial 19 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550


[I 2024-08-21 12:45:05,557] Trial 20 finished with value: 1.4676263328737997 and parameters: {'lambda_l1': 8.005714402475308e-06, 'lambda_l2': 0.00023289060224246586, 'num_leaves': 33, 'feature_fraction': 0.5524923920562164, 'bagging_fraction': 0.6233322623627218, 'bagging_freq': 5, 'min_child_samples': 29, 'learning_rate': 0.05952111494276592}. Best is trial 11 with value: 1.4659483044030044.


[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[71]	valid_0's rmse: 1.46763
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:05,734] Trial 21 finished with value: 1.4672203016943055 and parameters: {'lambda_l1': 6.52470589907728e-06, 'lambda_l2': 0.0010970371877114771, 'num_leaves': 35, 'feature_fraction': 0.5626083556457138, 'bagging_fraction': 0.8820993530062385, 'bagging_freq': 5, 'min_child_samples': 25, 'learning_rate': 0.059245643780355434}. Best is trial 11 with value: 1.4659483044030044.


Did not meet early stopping. Best iteration is:
[85]	valid_0's rmse: 1.46722
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[57]	valid_0's rmse: 1.46671


[I 2024-08-21 12:45:05,909] Trial 22 finished with value: 1.4667117261376492 and parameters: {'lambda_l1': 3.492761006508177e-06, 'lambda_l2': 0.0005704769292264122, 'num_leaves': 43, 'feature_fraction': 0.6063461043596163, 'bagging_fraction': 0.8219196948550134, 'bagging_freq': 5, 'min_child_samples': 23, 'learning_rate': 0.07250461540362364}. Best is trial 11 with value: 1.4659483044030044.
[I 2024-08-21 12:45:06,066] Trial 23 finished with value: 1.4673954937422833 and parameters: {'lambda_l1': 1.0071123717526857e-06, 'lambda_l2': 0.01617911648989952, 'num_leaves': 29, 'feature_fraction': 0.6004480325931488, 'bagging_fraction': 0.8051075563145196, 'bagging_freq': 4, 'min_child_samples': 21, 'learning_rate': 0.08033116716568839}. Best is trial 11 with value: 1.4659483044030044.
[I 2024-08-21 12:45:06,111] Trial 24 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[70]	valid_0's rmse: 1.4674
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10


[I 2024-08-21 12:45:06,165] Trial 25 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:06,276] Trial 26 pruned. Trial was pruned at iteration 55.


[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:06,385] Trial 27 pruned. Trial was pruned at iteration 63.
[I 2024-08-21 12:45:06,430] Trial 28 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:06,480] Trial 29 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:06,651] Trial 30 finished with value: 1.4653277598510444 and parameters: {'lambda_l1': 0.00033734944074203355, 'lambda_l2': 1.3974844334369008e-06, 'num_leaves': 26, 'feature_fraction': 0.5893408205732656, 'bagging_fraction': 0.7819923176156593, 'bagging_freq': 4, 'min_child_samples': 47, 'learning_rate': 0.05534284426056653}. Best is trial 30 with value: 1.4653277598510444.
[I 2024-08-21 12:45:06,803] Trial 31 finished with value: 1.4646873392398203 and parameters: {'lambda_l1': 3.1774126227901006e-06, 'lambda_l2': 2.3976037097802456e-06, 'num_leaves': 26, 'feature_fraction': 0.5914117996170477, 'bagging_fraction': 0.7843922952389459, 'bagging_freq': 4, 'min_child_samples': 48, 'learning_rate': 0.05328888480593457}. Best is trial 31 with value: 1.4646873392398203.


Did not meet early stopping. Best iteration is:
[84]	valid_0's rmse: 1.46533
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[84]	valid_0's rmse: 1.46469


[I 2024-08-21 12:45:06,960] Trial 32 finished with value: 1.466071531944665 and parameters: {'lambda_l1': 0.00408512178828274, 'lambda_l2': 1.7906840169976885e-06, 'num_leaves': 27, 'feature_fraction': 0.8017371275529239, 'bagging_fraction': 0.7907544487804362, 'bagging_freq': 4, 'min_child_samples': 47, 'learning_rate': 0.05571657211499701}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[77]	valid_0's rmse: 1.46607
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:07,122] Trial 33 finished with value: 1.4651197502059614 and parameters: {'lambda_l1': 0.004334962450762485, 'lambda_l2': 1.311486266851185e-06, 'num_leaves': 27, 'feature_fraction': 0.5800495658946998, 'bagging_fraction': 0.7868192284789303, 'bagging_freq': 4, 'min_child_samples': 47, 'learning_rate': 0.0545215922276419}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:07,175] Trial 34 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:07,228] Trial 35 pruned. Trial was pruned at iteration 0.


Did not meet early stopping. Best iteration is:
[84]	valid_0's rmse: 1.46512
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:07,410] Trial 36 finished with value: 1.466553206461716 and parameters: {'lambda_l1': 0.0002951927558119679, 'lambda_l2': 1.3535694467869592e-06, 'num_leaves': 41, 'feature_fraction': 0.5830595187213546, 'bagging_fraction': 0.8997392428461116, 'bagging_freq': 4, 'min_child_samples': 41, 'learning_rate': 0.06388156805463613}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:07,471] Trial 37 pruned. Trial was pruned at iteration 0.


Did not meet early stopping. Best iteration is:
[72]	valid_0's rmse: 1.46655
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:07,615] Trial 38 pruned. Trial was pruned at iteration 42.
[I 2024-08-21 12:45:07,666] Trial 39 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:07,715] Trial 40 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:07,883] Trial 41 finished with value: 1.466411597740059 and parameters: {'lambda_l1': 0.003812762232572236, 'lambda_l2': 3.3656733776667565e-06, 'num_leaves': 29, 'feature_fraction': 0.8116378286443153, 'bagging_fraction': 0.7939032371257346, 'bagging_freq': 4, 'min_child_samples': 47, 'learning_rate': 0.05627277992760728}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:08,051] Trial 42 finished with value: 1.4657353197659468 and parameters: {'lambda_l1': 0.004929710800473558, 'lambda_l2': 2.2780108286022463e-06, 'num_leaves': 26, 'feature_fraction': 0.8043049181021585, 'bagging_fraction': 0.7975613941986143, 'bagging_freq': 3, 'min_child_samples': 47, 'learning_rate': 0.06430295095940403}. Best is trial 31 with value: 1.4646873392398203.


Did not meet early stopping. Best iteration is:
[75]	valid_0's rmse: 1.46641
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[60]	valid_0's rmse: 1.46574


[I 2024-08-21 12:45:08,226] Trial 43 finished with value: 1.4664531140216501 and parameters: {'lambda_l1': 4.7067092002620646e-05, 'lambda_l2': 5.522432974031652e-06, 'num_leaves': 39, 'feature_fraction': 0.6440858626563917, 'bagging_fraction': 0.7626278907506668, 'bagging_freq': 3, 'min_child_samples': 49, 'learning_rate': 0.06855367628874048}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[62]	valid_0's rmse: 1.46645
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:08,382] Trial 44 finished with value: 1.4655524903422747 and parameters: {'lambda_l1': 0.006970016303213557, 'lambda_l2': 2.7119659450247885e-06, 'num_leaves': 25, 'feature_fraction': 0.5705753214684184, 'bagging_fraction': 0.8100788806859344, 'bagging_freq': 3, 'min_child_samples': 45, 'learning_rate': 0.06369466895420652}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:08,526] Trial 45 pruned. Trial was pruned at iteration 61.


Did not meet early stopping. Best iteration is:
[61]	valid_0's rmse: 1.46555
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:08,657] Trial 46 pruned. Trial was pruned at iteration 60.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:08,870] Trial 47 finished with value: 1.4662213225097818 and parameters: {'lambda_l1': 0.0017540639867568363, 'lambda_l2': 3.064899715798397e-05, 'num_leaves': 56, 'feature_fraction': 0.6600777866678544, 'bagging_fraction': 0.8407232323635198, 'bagging_freq': 3, 'min_child_samples': 43, 'learning_rate': 0.08798525291222024}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:08,924] Trial 48 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:08,989] Trial 49 pruned. Trial was pruned at iteration 0.


Did not meet early stopping. Best iteration is:
[48]	valid_0's rmse: 1.46622
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:09,052] Trial 50 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:09,206] Trial 51 finished with value: 1.4658260121050068 and parameters: {'lambda_l1': 0.016975907931725938, 'lambda_l2': 2.20318865569119e-06, 'num_leaves': 20, 'feature_fraction': 0.5434186617730242, 'bagging_fraction': 0.6809027536984241, 'bagging_freq': 3, 'min_child_samples': 44, 'learning_rate': 0.06278302536768225}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[78]	valid_0's rmse: 1.46583
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:09,370] Trial 52 finished with value: 1.4670083982204674 and parameters: {'lambda_l1': 0.02232743670792366, 'lambda_l2': 2.0739547031734047e-06, 'num_leaves': 24, 'feature_fraction': 0.5445666113939396, 'bagging_fraction': 0.7796112371072338, 'bagging_freq': 3, 'min_child_samples': 44, 'learning_rate': 0.06525173760235682}. Best is trial 31 with value: 1.4646873392398203.


Did not meet early stopping. Best iteration is:
[60]	valid_0's rmse: 1.46701
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[91]	valid_0's rmse: 1.46575


[I 2024-08-21 12:45:09,553] Trial 53 finished with value: 1.4657476165040613 and parameters: {'lambda_l1': 0.046154139789392465, 'lambda_l2': 9.40753468996793e-06, 'num_leaves': 32, 'feature_fraction': 0.5705415992955357, 'bagging_fraction': 0.86192354914305, 'bagging_freq': 3, 'min_child_samples': 42, 'learning_rate': 0.06002749641093897}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:09,719] Trial 54 finished with value: 1.4655392003405228 and parameters: {'lambda_l1': 0.046890249592426825, 'lambda_l2': 9.372707169373665e-06, 'num_leaves': 32, 'feature_fraction': 0.6372994867300529, 'bagging_fraction': 0.8698071868175166, 'bagging_freq': 3, 'min_child_samples': 41, 'learning_rate': 0.0694690504100553}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[63]	valid_0's rmse: 1.46554
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:09,891] Trial 55 finished with value: 1.4659247874787649 and parameters: {'lambda_l1': 0.167903729089506, 'lambda_l2': 4.457763358860327e-06, 'num_leaves': 33, 'feature_fraction': 0.6365321472759982, 'bagging_fraction': 0.8703795920009342, 'bagging_freq': 2, 'min_child_samples': 40, 'learning_rate': 0.07023691875674433}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:10,029] Trial 56 pruned. Trial was pruned at iteration 55.


Did not meet early stopping. Best iteration is:
[59]	valid_0's rmse: 1.46592
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10


[I 2024-08-21 12:45:10,158] Trial 57 pruned. Trial was pruned at iteration 55.
[I 2024-08-21 12:45:10,222] Trial 58 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:10,367] Trial 59 pruned. Trial was pruned at iteration 54.
[I 2024-08-21 12:45:10,421] Trial 60 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:10,476] Trial 61 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:10,526] Trial 62 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[64]	valid_0's rmse: 1.46559


[I 2024-08-21 12:45:10,700] Trial 63 finished with value: 1.4655882866320509 and parameters: {'lambda_l1': 0.2878134581194793, 'lambda_l2': 1.1829541756602337e-05, 'num_leaves': 29, 'feature_fraction': 0.60769384152331, 'bagging_fraction': 0.8072302579337831, 'bagging_freq': 3, 'min_child_samples': 43, 'learning_rate': 0.06829855365274586}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:10,852] Trial 64 finished with value: 1.4655001859570238 and parameters: {'lambda_l1': 0.4838754050948099, 'lambda_l2': 4.243301930403304e-06, 'num_leaves': 27, 'feature_fraction': 0.6150914408389321, 'bagging_fraction': 0.8131679777660318, 'bagging_freq': 2, 'min_child_samples': 50, 'learning_rate': 0.06951706212424415}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[80]	valid_0's rmse: 1.4655
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:11,008] Trial 65 finished with value: 1.4660608037972325 and parameters: {'lambda_l1': 0.5432558101045795, 'lambda_l2': 1.2081510488576078e-05, 'num_leaves': 29, 'feature_fraction': 0.6094965270241164, 'bagging_fraction': 0.8098221398020435, 'bagging_freq': 1, 'min_child_samples': 50, 'learning_rate': 0.0807661374633388}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:11,171] Trial 66 pruned. Trial was pruned at iteration 42.


Did not meet early stopping. Best iteration is:
[53]	valid_0's rmse: 1.46606
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:11,235] Trial 67 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:11,398] Trial 68 finished with value: 1.4663022248974047 and parameters: {'lambda_l1': 0.45031873499254216, 'lambda_l2': 8.374319326218898e-05, 'num_leaves': 23, 'feature_fraction': 0.6064674298715018, 'bagging_fraction': 0.8357597766062559, 'bagging_freq': 4, 'min_child_samples': 46, 'learning_rate': 0.0910165363635632}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[66]	valid_0's rmse: 1.4663


[I 2024-08-21 12:45:11,464] Trial 69 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:11,586] Trial 70 pruned. Trial was pruned at iteration 49.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10


[I 2024-08-21 12:45:11,787] Trial 71 finished with value: 1.4661963679832957 and parameters: {'lambda_l1': 0.9530399186196226, 'lambda_l2': 1.361932757032547e-06, 'num_leaves': 29, 'feature_fraction': 0.6015461479809342, 'bagging_fraction': 0.7976989226388684, 'bagging_freq': 3, 'min_child_samples': 48, 'learning_rate': 0.06761890554319211}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[64]	valid_0's rmse: 1.4662
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:11,970] Trial 72 finished with value: 1.4657894744562339 and parameters: {'lambda_l1': 0.0984462776088404, 'lambda_l2': 2.4198009102021086e-06, 'num_leaves': 27, 'feature_fraction': 0.7130228603271621, 'bagging_fraction': 0.7522986218827379, 'bagging_freq': 3, 'min_child_samples': 46, 'learning_rate': 0.07217946240698236}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:12,026] Trial 73 pruned. Trial was pruned at iteration 0.


Did not meet early stopping. Best iteration is:
[54]	valid_0's rmse: 1.46579
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:12,191] Trial 74 finished with value: 1.4652481829781674 and parameters: {'lambda_l1': 0.004291458117680001, 'lambda_l2': 0.6954985189380049, 'num_leaves': 20, 'feature_fraction': 0.5154533497112387, 'bagging_fraction': 0.7801423654831448, 'bagging_freq': 4, 'min_child_samples': 43, 'learning_rate': 0.06573764666289043}. Best is trial 31 with value: 1.4646873392398203.
[I 2024-08-21 12:45:12,269] Trial 75 pruned. Trial was pruned at iteration 0.


Did not meet early stopping. Best iteration is:
[70]	valid_0's rmse: 1.46525
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:12,403] Trial 76 pruned. Trial was pruned at iteration 66.
[I 2024-08-21 12:45:12,554] Trial 77 finished with value: 1.4650055754314117 and parameters: {'lambda_l1': 0.002783120440723514, 'lambda_l2': 6.458522901292677e-06, 'num_leaves': 20, 'feature_fraction': 0.55560612126182, 'bagging_fraction': 0.7739767796365753, 'bagging_freq': 4, 'min_child_samples': 34, 'learning_rate': 0.08058743256984273}. Best is trial 31 with value: 1.4646873392398203.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[76]	valid_0's rmse: 1.46501
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:12,716] Trial 78 finished with value: 1.464420666243794 and parameters: {'lambda_l1': 0.0025226268392361142, 'lambda_l2': 7.096017015048406e-06, 'num_leaves': 21, 'feature_fraction': 0.5597449208536864, 'bagging_fraction': 0.7682455926758424, 'bagging_freq': 4, 'min_child_samples': 32, 'learning_rate': 0.08205836795754815}. Best is trial 78 with value: 1.464420666243794.
[I 2024-08-21 12:45:12,871] Trial 79 finished with value: 1.4655895485888135 and parameters: {'lambda_l1': 0.002805083758882074, 'lambda_l2': 6.334098419232983e-06, 'num_leaves': 21, 'feature_fraction': 0.5547276729481961, 'bagging_fraction': 0.772783343027391, 'bagging_freq': 4, 'min_child_samples': 33, 'learning_rate': 0.08174722498582895}. Best is trial 78 with value: 1.464420666243794.


Did not meet early stopping. Best iteration is:
[69]	valid_0's rmse: 1.46442
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[70]	valid_0's rmse: 1.46559


[I 2024-08-21 12:45:13,034] Trial 80 finished with value: 1.4658971372921767 and parameters: {'lambda_l1': 0.0001499308228298852, 'lambda_l2': 0.7736392406512088, 'num_leaves': 20, 'feature_fraction': 0.5556176168610429, 'bagging_fraction': 0.7638742434214829, 'bagging_freq': 4, 'min_child_samples': 31, 'learning_rate': 0.09082870461627435}. Best is trial 78 with value: 1.464420666243794.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[48]	valid_0's rmse: 1.4659
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:13,195] Trial 81 finished with value: 1.4663432514884633 and parameters: {'lambda_l1': 0.002716890867264896, 'lambda_l2': 0.1693262267097282, 'num_leaves': 24, 'feature_fraction': 0.5258423510530278, 'bagging_fraction': 0.7517526370367847, 'bagging_freq': 4, 'min_child_samples': 27, 'learning_rate': 0.09947042489529342}. Best is trial 78 with value: 1.464420666243794.


Did not meet early stopping. Best iteration is:
[47]	valid_0's rmse: 1.46634
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[84]	valid_0's rmse: 1.4652


[I 2024-08-21 12:45:13,377] Trial 82 finished with value: 1.4652031126854328 and parameters: {'lambda_l1': 0.0006307021616965117, 'lambda_l2': 1.4742302099383452e-06, 'num_leaves': 27, 'feature_fraction': 0.5059366688036877, 'bagging_fraction': 0.7851070721053639, 'bagging_freq': 4, 'min_child_samples': 35, 'learning_rate': 0.07501605739162732}. Best is trial 78 with value: 1.464420666243794.
[I 2024-08-21 12:45:13,500] Trial 83 pruned. Trial was pruned at iteration 46.
[I 2024-08-21 12:45:13,558] Trial 84 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


[I 2024-08-21 12:45:13,666] Trial 85 pruned. Trial was pruned at iteration 44.
[I 2024-08-21 12:45:13,816] Trial 86 finished with value: 1.4659793822813958 and parameters: {'lambda_l1': 0.0006896009951306928, 'lambda_l2': 1.2982063018312534e-06, 'num_leaves': 20, 'feature_fraction': 0.5341030222817246, 'bagging_fraction': 0.7588293993985024, 'bagging_freq': 4, 'min_child_samples': 32, 'learning_rate': 0.08427799473166912}. Best is trial 78 with value: 1.464420666243794.
[I 2024-08-21 12:45:13,870] Trial 87 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[61]	valid_0's rmse: 1.46598
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:13,932] Trial 88 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:14,086] Trial 89 finished with value: 1.465791718356432 and parameters: {'lambda_l1': 0.00020530949608611512, 'lambda_l2': 7.089142551207428e-06, 'num_leaves': 23, 'feature_fraction': 0.5634201956930097, 'bagging_fraction': 0.7806244121084958, 'bagging_freq': 4, 'min_child_samples': 36, 'learning_rate': 0.09183828450527566}. Best is trial 78 with value: 1.464420666243794.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[53]	valid_0's rmse: 1.46579


[I 2024-08-21 12:45:14,210] Trial 90 pruned. Trial was pruned at iteration 46.
[I 2024-08-21 12:45:14,284] Trial 91 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:14,348] Trial 92 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:14,413] Trial 93 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:14,544] Trial 94 pruned. Trial was pruned at iteration 48.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:14,601] Trial 95 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:14,740] Trial 96 pruned. Trial was pruned at iteration 37.
[I 2024-08-21 12:45:14,798] Trial 97 pruned. Trial was pruned at iteration 0.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216


[I 2024-08-21 12:45:14,858] Trial 98 pruned. Trial was pruned at iteration 0.
[I 2024-08-21 12:45:14,981] Trial 99 pruned. Trial was pruned at iteration 46.


[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 31411, number of used features: 10
[LightGBM] [Info] Start training from score 83.451216
Training until validation scores don't improve for 100 rounds


In [56]:
# 개별 모델 정의
lr = LinearRegression()
# rf = RandomForestRegressor(n_estimators=300,random_state=42)
lgbm = lgb.LGBMRegressor(**best_params)
# Voting Regressor로 앙상블 모델 생성
voting_regressor = VotingRegressor(estimators=[('lr', lr), ('lgbm', lgbm)],)

## 4. 모델 학습

In [57]:
voting_regressor.fit(train_x, train_y)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000681 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 39264, number of used features: 10
[LightGBM] [Info] Start training from score 83.446284


In [58]:
# import matplotlib.pyplot as plt

# # 모델 학습 후 특성 중요도 추출
# importances = rf.feature_importances_
# indices = np.argsort(importances)[::-1]

# # 특성 중요도 시각화
# plt.figure(figsize=(10, 6))
# plt.title("Feature Importance")
# plt.bar(range(train_x_scaled.shape[1]), importances[indices], align="center")
# plt.xticks(range(train_x_scaled.shape[1]), train_x_scaled.columns[indices], rotation=90)
# plt.tight_layout()
# plt.show()

## 5. 예측값 생성
학습한 모델을 사용하여 예측값을 생성합니다.

In [59]:
y_pred = voting_regressor.predict(test_x)



## 5. 제출 파일 생성
submission 파일을 만들어서 제출합니다.

In [60]:
from datetime import datetime

submission = pd.read_csv('sample_submission.csv')

# 현재 날짜와 시간을 'YYYYMMDD_HHMMSS' 형식으로 포맷팅
today_datetime = datetime.today().strftime('%Y%m%d_%H%M')
submission['y'] = y_pred

# 포맷팅한 날짜와 시간으로 파일 이름 생성
submission.to_csv(f'./submission_{today_datetime}.csv', index=False)