In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
import plotly.express as px
import time

In [4]:
from sklearn.model_selection import train_test_split
import optuna.integration.lightgbm as lgb

# 環境設定

In [5]:
DATA_PATH = "./data"
RESULT_PATH = "./results"

# 関数定義

In [6]:
def cosine_similarity(y_true, y_pred):
    if isinstance(y_true, pd.Series):
        y_true = y_true.values.reshape(-1)
    if isinstance(y_pred, pd.Series):
        y_pred = y_pred.values.reshape(-1)
    score = np.dot(y_true, y_pred) / (np.linalg.norm(y_true) * np.linalg.norm(y_pred))
    return score

# 分析

## データ読み込み

In [8]:
train_data = pd.read_csv(Path(DATA_PATH, "train.csv"))
test_data = pd.read_csv(Path(DATA_PATH, "test.csv"))

## モデルの作成

In [9]:
# サンプリング（時間短縮のため）
sample_train_data = train_data.sample(frac=0.2)

In [10]:
# データ分割
# TODO: 時系列データであることを考慮すべき？
feature_cols = sample_train_data.drop(columns=["id", "target"]).columns
target_col = "target"
X_train, X_valid, y_train, y_valid = train_test_split(
    sample_train_data[feature_cols], 
    sample_train_data[target_col], 
    test_size=0.3
)
print(f'X_trainのshape:{X_train.shape}')
print(f'y_trainのshape:{y_train.shape}')
print(f'X_validのshape:{X_valid.shape}')
print(f'y_validのshape:{y_valid.shape}')

X_trainのshape:(92400, 695)
y_trainのshape:(92400,)
X_validのshape:(39600, 695)
y_validのshape:(39600,)


In [15]:
# データセットの作成
reg_train = lgb.Dataset(X_train, y_train)
reg_eval = lgb.Dataset(X_valid, y_valid, reference=reg_train)

In [19]:
# パラメータの準備
params = {
    'boosting_type':'gbdt',
    'objective':'regression',
    'metric': 'rmse',
    'random_state':0
}

In [20]:
# HPチューニング
start_time = time.time()
opt=lgb.train(
    params,
    reg_train,
    valid_sets = reg_eval,
    verbose_eval=False,
    num_boost_round = 5,
)
print(f"学習にかかった時間：{time.time() - start_time}")

[I 2023-08-18 01:05:34,424] A new study created in memory with name: no-name-2bc6065c-a72f-400a-9c2f-d1b5a0c2684e

  0%|                                                     | 0/7 [00:00<?, ?it/s][A
feature_fraction, val_score: inf:   0%|                   | 0/7 [00:00<?, ?it/s][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695



feature_fraction, val_score: 0.996776:   0%|              | 0/7 [00:05<?, ?it/s][A
feature_fraction, val_score: 0.996776:  14%|8     | 1/7 [00:05<00:34,  5.67s/it][A[I 2023-08-18 01:05:40,121] Trial 0 finished with value: 0.9967760977270483 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.9967760977270483.

feature_fraction, val_score: 0.996776:  14%|8     | 1/7 [00:05<00:34,  5.67s/it][A

[LightGBM] [Info] Start training from score -0.002987
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction, val_score: 0.996776:  14%|8     | 1/7 [00:10<00:34,  5.67s/it][A
feature_fraction, val_score: 0.996776:  29%|#7    | 2/7 [00:11<00:27,  5.47s/it][A[I 2023-08-18 01:05:45,455] Trial 1 finished with value: 0.9968284563804962 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 0.9967760977270483.

feature_fraction, val_score: 0.996776:  29%|#7    | 2/7 [00:11<00:27,  5.47s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695



feature_fraction, val_score: 0.996776:  29%|#7    | 2/7 [00:16<00:27,  5.47s/it]

[LightGBM] [Info] Start training from score -0.002987


[A
feature_fraction, val_score: 0.996776:  43%|##5   | 3/7 [00:16<00:21,  5.32s/it][A[I 2023-08-18 01:05:50,587] Trial 2 finished with value: 0.9968250519836077 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.9967760977270483.

feature_fraction, val_score: 0.996776:  43%|##5   | 3/7 [00:16<00:21,  5.32s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695



feature_fraction, val_score: 0.996776:  43%|##5   | 3/7 [00:21<00:21,  5.32s/it][A
feature_fraction, val_score: 0.996776:  57%|###4  | 4/7 [00:21<00:15,  5.20s/it][A

[LightGBM] [Info] Start training from score -0.002987


[I 2023-08-18 01:05:55,614] Trial 3 finished with value: 0.9969082978987007 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.9967760977270483.

feature_fraction, val_score: 0.996776:  57%|###4  | 4/7 [00:21<00:15,  5.20s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695



feature_fraction, val_score: 0.996533:  57%|###4  | 4/7 [00:26<00:15,  5.20s/it][A
feature_fraction, val_score: 0.996533:  71%|####2 | 5/7 [00:26<00:10,  5.12s/it][A

[LightGBM] [Info] Start training from score -0.002987


[I 2023-08-18 01:06:00,575] Trial 4 finished with value: 0.9965326979791113 and parameters: {'feature_fraction': 0.6}. Best is trial 4 with value: 0.9965326979791113.

feature_fraction, val_score: 0.996533:  71%|####2 | 5/7 [00:26<00:10,  5.12s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695



feature_fraction, val_score: 0.996533:  71%|####2 | 5/7 [00:31<00:10,  5.12s/it][A
feature_fraction, val_score: 0.996533:  86%|#####1| 6/7 [00:31<00:05,  5.04s/it][A[I 2023-08-18 01:06:05,468] Trial 5 finished with value: 0.9969540408167437 and parameters: {'feature_fraction': 0.4}. Best is trial 4 with value: 0.9965326979791113.

feature_fraction, val_score: 0.996533:  86%|#####1| 6/7 [00:31<00:05,  5.04s/it][A

[LightGBM] [Info] Start training from score -0.002987
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695



feature_fraction, val_score: 0.996533:  86%|#####1| 6/7 [00:36<00:05,  5.04s/it][A
feature_fraction, val_score: 0.996533: 100%|######| 7/7 [00:36<00:00,  5.04s/it][A

[LightGBM] [Info] Start training from score -0.002987


[I 2023-08-18 01:06:10,497] Trial 6 finished with value: 0.9966142282266405 and parameters: {'feature_fraction': 1.0}. Best is trial 4 with value: 0.9965326979791113.
feature_fraction, val_score: 0.996533: 100%|######| 7/7 [00:36<00:00,  5.15s/it]

  0%|                                                    | 0/20 [00:00<?, ?it/s][A
num_leaves, val_score: 0.996533:   0%|                   | 0/20 [00:00<?, ?it/s][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995988:   0%|                   | 0/20 [00:05<?, ?it/s][A
num_leaves, val_score: 0.995988:   5%|5          | 1/20 [00:05<01:40,  5.28s/it][A[I 2023-08-18 01:06:15,802] Trial 7 finished with value: 0.9959882340055168 and parameters: {'num_leaves': 247}. Best is trial 7 with value: 0.9959882340055168.

num_leaves, val_score: 0.995988:   5%|5          | 1/20 [00:05<01:40,  5.28s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995914:   5%|5          | 1/20 [00:10<01:40,  5.28s/it][A
num_leaves, val_score: 0.995914:  10%|#1         | 2/20 [00:10<01:34,  5.24s/it][A[I 2023-08-18 01:06:21,016] Trial 8 finished with value: 0.9959141714744799 and parameters: {'num_leaves': 183}. Best is trial 8 with value: 0.9959141714744799.

num_leaves, val_score: 0.995914:  10%|#1         | 2/20 [00:10<01:34,  5.24s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995914:  10%|#1         | 2/20 [00:15<01:34,  5.24s/it][A
num_leaves, val_score: 0.995914:  15%|#6         | 3/20 [00:15<01:28,  5.20s/it][A[I 2023-08-18 01:06:26,158] Trial 9 finished with value: 0.996461049980092 and parameters: {'num_leaves': 59}. Best is trial 8 with value: 0.9959141714744799.

num_leaves, val_score: 0.995914:  15%|#6         | 3/20 [00:15<01:28,  5.20s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  15%|#6         | 3/20 [00:20<01:28,  5.20s/it][A
num_leaves, val_score: 0.995607:  20%|##2        | 4/20 [00:20<01:23,  5.21s/it][A[I 2023-08-18 01:06:31,382] Trial 10 finished with value: 0.9956074259972918 and parameters: {'num_leaves': 209}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  20%|##2        | 4/20 [00:20<01:23,  5.21s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  20%|##2        | 4/20 [00:26<01:23,  5.21s/it][A
num_leaves, val_score: 0.995607:  25%|##7        | 5/20 [00:26<01:18,  5.25s/it][A[I 2023-08-18 01:06:36,722] Trial 11 finished with value: 0.9960649726306582 and parameters: {'num_leaves': 194}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  25%|##7        | 5/20 [00:26<01:18,  5.25s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  25%|##7        | 5/20 [00:31<01:18,  5.25s/it][A
num_leaves, val_score: 0.995607:  30%|###3       | 6/20 [00:31<01:15,  5.39s/it][A[I 2023-08-18 01:06:42,388] Trial 12 finished with value: 0.9959499328231622 and parameters: {'num_leaves': 166}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  30%|###3       | 6/20 [00:31<01:15,  5.39s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  30%|###3       | 6/20 [00:37<01:15,  5.39s/it][A
num_leaves, val_score: 0.995607:  35%|###8       | 7/20 [00:37<01:11,  5.48s/it][A[I 2023-08-18 01:06:48,033] Trial 13 finished with value: 0.9957927220260545 and parameters: {'num_leaves': 198}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  35%|###8       | 7/20 [00:37<01:11,  5.48s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  35%|###8       | 7/20 [00:43<01:11,  5.48s/it][A
num_leaves, val_score: 0.995607:  40%|####4      | 8/20 [00:43<01:06,  5.51s/it][A[I 2023-08-18 01:06:53,616] Trial 14 finished with value: 0.9957189476954951 and parameters: {'num_leaves': 249}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  40%|####4      | 8/20 [00:43<01:06,  5.51s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  40%|####4      | 8/20 [00:49<01:06,  5.51s/it][A
num_leaves, val_score: 0.995607:  45%|####9      | 9/20 [00:49<01:02,  5.67s/it][A[I 2023-08-18 01:06:59,630] Trial 15 finished with value: 0.9958963879030178 and parameters: {'num_leaves': 240}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  45%|####9      | 9/20 [00:49<01:02,  5.67s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  45%|####9      | 9/20 [00:54<01:02,  5.67s/it][A
num_leaves, val_score: 0.995607:  50%|#####     | 10/20 [00:54<00:57,  5.71s/it][A[I 2023-08-18 01:07:05,451] Trial 16 finished with value: 0.9966672324146779 and parameters: {'num_leaves': 71}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  50%|#####     | 10/20 [00:54<00:57,  5.71s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  50%|#####     | 10/20 [01:00<00:57,  5.71s/it][A
num_leaves, val_score: 0.995607:  55%|#####5    | 11/20 [01:00<00:51,  5.70s/it][A[I 2023-08-18 01:07:11,125] Trial 17 finished with value: 0.9956989587985555 and parameters: {'num_leaves': 110}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  55%|#####5    | 11/20 [01:00<00:51,  5.70s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  55%|#####5    | 11/20 [01:06<00:51,  5.70s/it][A
num_leaves, val_score: 0.995607:  60%|######    | 12/20 [01:06<00:45,  5.65s/it][A[I 2023-08-18 01:07:16,647] Trial 18 finished with value: 0.9957287494369312 and parameters: {'num_leaves': 112}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  60%|######    | 12/20 [01:06<00:45,  5.65s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  60%|######    | 12/20 [01:11<00:45,  5.65s/it][A
num_leaves, val_score: 0.995607:  65%|######5   | 13/20 [01:11<00:39,  5.61s/it][A[I 2023-08-18 01:07:22,164] Trial 19 finished with value: 0.9963447019050127 and parameters: {'num_leaves': 119}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  65%|######5   | 13/20 [01:11<00:39,  5.61s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  65%|######5   | 13/20 [01:16<00:39,  5.61s/it][A
num_leaves, val_score: 0.995607:  70%|#######   | 14/20 [01:16<00:33,  5.52s/it][A[I 2023-08-18 01:07:27,467] Trial 20 finished with value: 0.9966851498955488 and parameters: {'num_leaves': 34}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  70%|#######   | 14/20 [01:16<00:33,  5.52s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  70%|#######   | 14/20 [01:22<00:33,  5.52s/it][A
num_leaves, val_score: 0.995607:  75%|#######5  | 15/20 [01:22<00:27,  5.53s/it][A[I 2023-08-18 01:07:33,016] Trial 21 finished with value: 0.9958498116749027 and parameters: {'num_leaves': 253}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  75%|#######5  | 15/20 [01:22<00:27,  5.53s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  75%|#######5  | 15/20 [01:28<00:27,  5.53s/it][A
num_leaves, val_score: 0.995607:  80%|########  | 16/20 [01:28<00:22,  5.52s/it][A[I 2023-08-18 01:07:38,527] Trial 22 finished with value: 0.9960554800442372 and parameters: {'num_leaves': 149}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  80%|########  | 16/20 [01:28<00:22,  5.52s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995607:  80%|########  | 16/20 [01:33<00:22,  5.52s/it][A
num_leaves, val_score: 0.995607:  85%|########5 | 17/20 [01:33<00:16,  5.51s/it][A[I 2023-08-18 01:07:44,018] Trial 23 finished with value: 0.995651443291471 and parameters: {'num_leaves': 216}. Best is trial 10 with value: 0.9956074259972918.

num_leaves, val_score: 0.995607:  85%|########5 | 17/20 [01:33<00:16,  5.51s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995577:  85%|########5 | 17/20 [01:38<00:16,  5.51s/it][A
num_leaves, val_score: 0.995577:  90%|######### | 18/20 [01:38<00:11,  5.50s/it][A[I 2023-08-18 01:07:49,497] Trial 24 finished with value: 0.995577007777663 and parameters: {'num_leaves': 211}. Best is trial 24 with value: 0.995577007777663.

num_leaves, val_score: 0.995577:  90%|######### | 18/20 [01:38<00:11,  5.50s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995577:  90%|######### | 18/20 [01:44<00:11,  5.50s/it][A
num_leaves, val_score: 0.995577:  95%|#########5| 19/20 [01:44<00:05,  5.49s/it][A[I 2023-08-18 01:07:54,968] Trial 25 finished with value: 0.9956074259972918 and parameters: {'num_leaves': 209}. Best is trial 24 with value: 0.995577007777663.

num_leaves, val_score: 0.995577:  95%|#########5| 19/20 [01:44<00:05,  5.49s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



num_leaves, val_score: 0.995466:  95%|#########5| 19/20 [01:49<00:05,  5.49s/it][A
num_leaves, val_score: 0.995466: 100%|##########| 20/20 [01:49<00:00,  5.48s/it][A[I 2023-08-18 01:08:00,417] Trial 26 finished with value: 0.9954655829233116 and parameters: {'num_leaves': 215}. Best is trial 26 with value: 0.9954655829233116.
num_leaves, val_score: 0.995466: 100%|##########| 20/20 [01:49<00:00,  5.50s/it]

  0%|                                                    | 0/10 [00:00<?, ?it/s][A
bagging, val_score: 0.995466:   0%|                      | 0/10 [00:00<?, ?it/s][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:   0%|                      | 0/10 [00:05<?, ?it/s][A
bagging, val_score: 0.995466:  10%|#4            | 1/10 [00:05<00:48,  5.36s/it][A[I 2023-08-18 01:08:05,801] Trial 27 finished with value: 0.9960730116277751 and parameters: {'bagging_fraction': 0.5579615122609489, 'bagging_freq': 6}. Best is trial 27 with value: 0.9960730116277751.

bagging, val_score: 0.995466:  10%|#4            | 1/10 [00:05<00:48,  5.36s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  10%|#4            | 1/10 [00:10<00:48,  5.36s/it][A
bagging, val_score: 0.995466:  20%|##8           | 2/10 [00:10<00:43,  5.39s/it][A[I 2023-08-18 01:08:11,211] Trial 28 finished with value: 0.995698542623875 and parameters: {'bagging_fraction': 0.9940818264031497, 'bagging_freq': 1}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  20%|##8           | 2/10 [00:10<00:43,  5.39s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  20%|##8           | 2/10 [00:16<00:43,  5.39s/it][A
bagging, val_score: 0.995466:  30%|####2         | 3/10 [00:16<00:37,  5.34s/it][A[I 2023-08-18 01:08:16,489] Trial 29 finished with value: 0.996922986249653 and parameters: {'bagging_fraction': 0.426028743833222, 'bagging_freq': 2}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  30%|####2         | 3/10 [00:16<00:37,  5.34s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  30%|####2         | 3/10 [00:21<00:37,  5.34s/it][A
bagging, val_score: 0.995466:  40%|#####6        | 4/10 [00:21<00:32,  5.44s/it][A[I 2023-08-18 01:08:22,099] Trial 30 finished with value: 0.996257154813145 and parameters: {'bagging_fraction': 0.7801066678323617, 'bagging_freq': 7}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  40%|#####6        | 4/10 [00:21<00:32,  5.44s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  40%|#####6        | 4/10 [00:27<00:32,  5.44s/it][A
bagging, val_score: 0.995466:  50%|#######       | 5/10 [00:27<00:27,  5.44s/it][A[I 2023-08-18 01:08:27,536] Trial 31 finished with value: 0.9958506241010628 and parameters: {'bagging_fraction': 0.7968332255371479, 'bagging_freq': 4}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  50%|#######       | 5/10 [00:27<00:27,  5.44s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  50%|#######       | 5/10 [00:32<00:27,  5.44s/it][A
bagging, val_score: 0.995466:  60%|########4     | 6/10 [00:32<00:21,  5.42s/it][A[I 2023-08-18 01:08:32,906] Trial 32 finished with value: 0.9961539065747482 and parameters: {'bagging_fraction': 0.6131167185176797, 'bagging_freq': 4}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  60%|########4     | 6/10 [00:32<00:21,  5.42s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  60%|########4     | 6/10 [00:37<00:21,  5.42s/it][A
bagging, val_score: 0.995466:  70%|#########7    | 7/10 [00:37<00:16,  5.44s/it][A[I 2023-08-18 01:08:38,406] Trial 33 finished with value: 0.9957210649356334 and parameters: {'bagging_fraction': 0.9937367450330548, 'bagging_freq': 5}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  70%|#########7    | 7/10 [00:37<00:16,  5.44s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  70%|#########7    | 7/10 [00:43<00:16,  5.44s/it][A
bagging, val_score: 0.995466:  80%|###########2  | 8/10 [00:43<00:10,  5.41s/it][A[I 2023-08-18 01:08:43,736] Trial 34 finished with value: 0.9963900322958897 and parameters: {'bagging_fraction': 0.4912168024222032, 'bagging_freq': 2}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  80%|###########2  | 8/10 [00:43<00:10,  5.41s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  80%|###########2  | 8/10 [00:48<00:10,  5.41s/it][A
bagging, val_score: 0.995466:  90%|############6 | 9/10 [00:48<00:05,  5.37s/it][A[I 2023-08-18 01:08:49,006] Trial 35 finished with value: 0.9968556047150524 and parameters: {'bagging_fraction': 0.41294836574002763, 'bagging_freq': 7}. Best is trial 28 with value: 0.995698542623875.

bagging, val_score: 0.995466:  90%|############6 | 9/10 [00:48<00:05,  5.37s/it][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



bagging, val_score: 0.995466:  90%|############6 | 9/10 [00:53<00:05,  5.37s/it][A
bagging, val_score: 0.995466: 100%|#############| 10/10 [00:53<00:00,  5.33s/it][A[I 2023-08-18 01:08:54,262] Trial 36 finished with value: 0.9963282607124132 and parameters: {'bagging_fraction': 0.6343438627983371, 'bagging_freq': 3}. Best is trial 28 with value: 0.995698542623875.
bagging, val_score: 0.995466: 100%|#############| 10/10 [00:53<00:00,  5.38s/it]

  0%|                                                     | 0/6 [00:00<?, ?it/s][A
feature_fraction_stage2, val_score: 0.995466:   0%|       | 0/6 [00:00<?, ?it/s][A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction_stage2, val_score: 0.995466:   0%|       | 0/6 [00:05<?, ?it/s][A
feature_fraction_stage2, val_score: 0.995466:  17%|1| 1/6 [00:05<00:26,  5.37s/i[A[I 2023-08-18 01:08:59,655] Trial 37 finished with value: 0.9956019645213364 and parameters: {'feature_fraction': 0.6479999999999999}. Best is trial 37 with value: 0.9956019645213364.

feature_fraction_stage2, val_score: 0.995466:  17%|1| 1/6 [00:05<00:26,  5.37s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction_stage2, val_score: 0.995466:  17%|1| 1/6 [00:10<00:26,  5.37s/i[A
feature_fraction_stage2, val_score: 0.995466:  33%|3| 2/6 [00:10<00:21,  5.33s/i[A[I 2023-08-18 01:09:04,968] Trial 38 finished with value: 0.9956339491399944 and parameters: {'feature_fraction': 0.584}. Best is trial 37 with value: 0.9956019645213364.

feature_fraction_stage2, val_score: 0.995466:  33%|3| 2/6 [00:10<00:21,  5.33s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction_stage2, val_score: 0.995466:  33%|3| 2/6 [00:15<00:21,  5.33s/i[A
feature_fraction_stage2, val_score: 0.995466:  50%|5| 3/6 [00:15<00:15,  5.28s/i[A[I 2023-08-18 01:09:10,183] Trial 39 finished with value: 0.9957818573164237 and parameters: {'feature_fraction': 0.6799999999999999}. Best is trial 37 with value: 0.9956019645213364.

feature_fraction_stage2, val_score: 0.995466:  50%|5| 3/6 [00:15<00:15,  5.28s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction_stage2, val_score: 0.995466:  50%|5| 3/6 [00:21<00:15,  5.28s/i[A
feature_fraction_stage2, val_score: 0.995466:  67%|6| 4/6 [00:21<00:10,  5.29s/i[A[I 2023-08-18 01:09:15,481] Trial 40 finished with value: 0.9960280506954545 and parameters: {'feature_fraction': 0.552}. Best is trial 37 with value: 0.9956019645213364.

feature_fraction_stage2, val_score: 0.995466:  67%|6| 4/6 [00:21<00:10,  5.29s/i[A

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction_stage2, val_score: 0.995466:  67%|6| 4/6 [00:26<00:10,  5.29s/i[A
feature_fraction_stage2, val_score: 0.995466:  83%|8| 5/6 [00:26<00:05,  5.32s/i[A[I 2023-08-18 01:09:20,873] Trial 41 finished with value: 0.9956704023012566 and parameters: {'feature_fraction': 0.616}. Best is trial 37 with value: 0.9956019645213364.

feature_fraction_stage2, val_score: 0.995466:  83%|8| 5/6 [00:26<00:05,  5.32s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



feature_fraction_stage2, val_score: 0.994983:  83%|8| 5/6 [00:31<00:05,  5.32s/i[A
feature_fraction_stage2, val_score: 0.994983: 100%|#| 6/6 [00:31<00:00,  5.32s/i[A[I 2023-08-18 01:09:26,174] Trial 42 finished with value: 0.9949833624968405 and parameters: {'feature_fraction': 0.52}. Best is trial 42 with value: 0.9949833624968405.
feature_fraction_stage2, val_score: 0.994983: 100%|#| 6/6 [00:31<00:00,  5.32s/i

  0%|                                                    | 0/20 [00:00<?, ?it/s][A
regularization_factors, val_score: 0.994983:   0%|       | 0/20 [00:00<?, ?it/s][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994983:   0%|       | 0/20 [00:05<?, ?it/s][A
regularization_factors, val_score: 0.994983:   5%| | 1/20 [00:05<01:38,  5.19s/i[A[I 2023-08-18 01:09:31,387] Trial 43 finished with value: 0.9954826198524029 and parameters: {'lambda_l1': 0.2136622808336877, 'lambda_l2': 0.012911276001406811}. Best is trial 43 with value: 0.9954826198524029.

regularization_factors, val_score: 0.994983:   5%| | 1/20 [00:05<01:38,  5.19s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994983:   5%| | 1/20 [00:10<01:38,  5.19s/i[A
regularization_factors, val_score: 0.994983:  10%|1| 2/20 [00:10<01:34,  5.26s/i[A[I 2023-08-18 01:09:36,697] Trial 44 finished with value: 0.995480948532479 and parameters: {'lambda_l1': 0.20801585236344486, 'lambda_l2': 0.02398363437163156}. Best is trial 44 with value: 0.995480948532479.

regularization_factors, val_score: 0.994983:  10%|1| 2/20 [00:10<01:34,  5.26s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994983:  10%|1| 2/20 [00:15<01:34,  5.26s/i[A
regularization_factors, val_score: 0.994983:  15%|1| 3/20 [00:15<01:29,  5.26s/i[A[I 2023-08-18 01:09:41,967] Trial 45 finished with value: 0.9952584901549814 and parameters: {'lambda_l1': 0.4251327297754372, 'lambda_l2': 0.022101842426434624}. Best is trial 45 with value: 0.9952584901549814.

regularization_factors, val_score: 0.994983:  15%|1| 3/20 [00:15<01:29,  5.26s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994983:  15%|1| 3/20 [00:21<01:29,  5.26s/i[A
regularization_factors, val_score: 0.994983:  20%|2| 4/20 [00:21<01:24,  5.27s/i[A[I 2023-08-18 01:09:47,256] Trial 46 finished with value: 0.9954185528657268 and parameters: {'lambda_l1': 0.31613800022042965, 'lambda_l2': 0.013773301007503779}. Best is trial 45 with value: 0.9952584901549814.

regularization_factors, val_score: 0.994983:  20%|2| 4/20 [00:21<01:24,  5.27s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994983:  20%|2| 4/20 [00:26<01:24,  5.27s/i[A
regularization_factors, val_score: 0.994983:  25%|2| 5/20 [00:26<01:19,  5.27s/i[A[I 2023-08-18 01:09:52,530] Trial 47 finished with value: 0.9952379041885228 and parameters: {'lambda_l1': 0.23435778334501092, 'lambda_l2': 0.016025750013555305}. Best is trial 47 with value: 0.9952379041885228.

regularization_factors, val_score: 0.994983:  25%|2| 5/20 [00:26<01:19,  5.27s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994983:  25%|2| 5/20 [00:31<01:19,  5.27s/i[A
regularization_factors, val_score: 0.994983:  30%|3| 6/20 [00:31<01:13,  5.26s/i[A[I 2023-08-18 01:09:57,768] Trial 48 finished with value: 0.9957505439090681 and parameters: {'lambda_l1': 0.8667936608328551, 'lambda_l2': 0.012401628856864182}. Best is trial 47 with value: 0.9952379041885228.

regularization_factors, val_score: 0.994983:  30%|3| 6/20 [00:31<01:13,  5.26s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  30%|3| 6/20 [00:36<01:13,  5.26s/i[A
regularization_factors, val_score: 0.994958:  35%|3| 7/20 [00:36<01:08,  5.25s/i[A[I 2023-08-18 01:10:03,004] Trial 49 finished with value: 0.9949576808147785 and parameters: {'lambda_l1': 0.003566907316652897, 'lambda_l2': 1.9847715690369496}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  35%|3| 7/20 [00:36<01:08,  5.25s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  35%|3| 7/20 [00:42<01:08,  5.25s/i[A
regularization_factors, val_score: 0.994958:  40%|4| 8/20 [00:42<01:02,  5.24s/i[A[I 2023-08-18 01:10:08,208] Trial 50 finished with value: 0.995333033313936 and parameters: {'lambda_l1': 0.062471185979177994, 'lambda_l2': 7.846790808550792}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  40%|4| 8/20 [00:42<01:02,  5.24s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  40%|4| 8/20 [00:47<01:02,  5.24s/i[A
regularization_factors, val_score: 0.994958:  45%|4| 9/20 [00:47<00:57,  5.24s/i[A[I 2023-08-18 01:10:13,453] Trial 51 finished with value: 0.9951631591524516 and parameters: {'lambda_l1': 0.05134549204265735, 'lambda_l2': 7.147493690647091}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  45%|4| 9/20 [00:47<00:57,  5.24s/i[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  45%|4| 9/20 [00:52<00:57,  5.24s/i[A
regularization_factors, val_score: 0.994958:  50%|5| 10/20 [00:52<00:52,  5.23s/[A[I 2023-08-18 01:10:18,671] Trial 52 finished with value: 0.9953610478855305 and parameters: {'lambda_l1': 0.004718639732040681, 'lambda_l2': 4.360816416337054}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  50%|5| 10/20 [00:52<00:52,  5.23s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  50%|5| 10/20 [00:57<00:52,  5.23s/[A
regularization_factors, val_score: 0.994958:  55%|5| 11/20 [00:57<00:47,  5.22s/[A[I 2023-08-18 01:10:23,877] Trial 53 finished with value: 0.9951135888403504 and parameters: {'lambda_l1': 0.0035203883351434006, 'lambda_l2': 5.276840422741928}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  55%|5| 11/20 [00:57<00:47,  5.22s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  55%|5| 11/20 [01:02<00:47,  5.22s/[A
regularization_factors, val_score: 0.994958:  60%|6| 12/20 [01:02<00:41,  5.21s/[A[I 2023-08-18 01:10:29,070] Trial 54 finished with value: 0.9950386979167737 and parameters: {'lambda_l1': 0.0002680546569239866, 'lambda_l2': 6.67488536454302}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  60%|6| 12/20 [01:02<00:41,  5.21s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  60%|6| 12/20 [01:08<00:41,  5.21s/[A
regularization_factors, val_score: 0.994958:  65%|6| 13/20 [01:08<00:36,  5.21s/[A[I 2023-08-18 01:10:34,259] Trial 55 finished with value: 0.9951247378657678 and parameters: {'lambda_l1': 0.0004971103245558534, 'lambda_l2': 9.333055801136899}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  65%|6| 13/20 [01:08<00:36,  5.21s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  65%|6| 13/20 [01:13<00:36,  5.21s/[A
regularization_factors, val_score: 0.994958:  70%|7| 14/20 [01:13<00:31,  5.19s/[A[I 2023-08-18 01:10:39,423] Trial 56 finished with value: 0.9950726754766542 and parameters: {'lambda_l1': 0.00047246295701554737, 'lambda_l2': 9.125947032350096}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  70%|7| 14/20 [01:13<00:31,  5.19s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  70%|7| 14/20 [01:18<00:31,  5.19s/[A
regularization_factors, val_score: 0.994958:  75%|7| 15/20 [01:18<00:26,  5.21s/[A[I 2023-08-18 01:10:44,664] Trial 57 finished with value: 0.9949980888516899 and parameters: {'lambda_l1': 0.00020283481021051033, 'lambda_l2': 7.281994093935517}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  75%|7| 15/20 [01:18<00:26,  5.21s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  75%|7| 15/20 [01:23<00:26,  5.21s/[A
regularization_factors, val_score: 0.994958:  80%|8| 16/20 [01:23<00:20,  5.21s/[A[I 2023-08-18 01:10:49,864] Trial 58 finished with value: 0.9953896584731617 and parameters: {'lambda_l1': 9.795886342984226e-05, 'lambda_l2': 0.6281866198016425}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  80%|8| 16/20 [01:23<00:20,  5.21s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  80%|8| 16/20 [01:28<00:20,  5.21s/[A
regularization_factors, val_score: 0.994958:  85%|8| 17/20 [01:28<00:15,  5.20s/[A[I 2023-08-18 01:10:55,039] Trial 59 finished with value: 0.9953863248374145 and parameters: {'lambda_l1': 0.0007789849591092841, 'lambda_l2': 0.5614011147913154}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  85%|8| 17/20 [01:28<00:15,  5.20s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  85%|8| 17/20 [01:33<00:15,  5.20s/[A
regularization_factors, val_score: 0.994958:  90%|9| 18/20 [01:33<00:10,  5.18s/[A[I 2023-08-18 01:11:00,179] Trial 60 finished with value: 0.9949833629563524 and parameters: {'lambda_l1': 1.1361570213218912e-05, 'lambda_l2': 2.5102308553949816e-06}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  90%|9| 18/20 [01:34<00:10,  5.18s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  90%|9| 18/20 [01:39<00:10,  5.18s/[A
regularization_factors, val_score: 0.994958:  95%|9| 19/20 [01:39<00:05,  5.19s/[A[I 2023-08-18 01:11:05,396] Trial 61 finished with value: 0.9949833626458683 and parameters: {'lambda_l1': 4.405409504479337e-06, 'lambda_l2': 1.512274752950575e-07}. Best is trial 49 with value: 0.9949576808147785.

regularization_factors, val_score: 0.994958:  95%|9| 19/20 [01:39<00:05,  5.19s/[A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



regularization_factors, val_score: 0.994958:  95%|9| 19/20 [01:44<00:05,  5.19s/[A
regularization_factors, val_score: 0.994958: 100%|#| 20/20 [01:44<00:00,  5.19s/[A[I 2023-08-18 01:11:10,593] Trial 62 finished with value: 0.9949833627543506 and parameters: {'lambda_l1': 7.731435060366604e-06, 'lambda_l2': 1.2039781806121636e-07}. Best is trial 49 with value: 0.9949576808147785.
regularization_factors, val_score: 0.994958: 100%|#| 20/20 [01:44<00:00,  5.22s/

  0%|                                                     | 0/5 [00:00<?, ?it/s][A
min_data_in_leaf, val_score: 0.994958:   0%|              | 0/5 [00:00<?, ?it/s][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



min_data_in_leaf, val_score: 0.994660:   0%|              | 0/5 [00:05<?, ?it/s][A
min_data_in_leaf, val_score: 0.994660:  20%|#2    | 1/5 [00:05<00:20,  5.19s/it][A[I 2023-08-18 01:11:15,810] Trial 63 finished with value: 0.9946600119505157 and parameters: {'min_child_samples': 100}. Best is trial 63 with value: 0.9946600119505157.

min_data_in_leaf, val_score: 0.994660:  20%|#2    | 1/5 [00:05<00:20,  5.19s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



min_data_in_leaf, val_score: 0.994660:  20%|#2    | 1/5 [00:10<00:20,  5.19s/it][A
min_data_in_leaf, val_score: 0.994660:  40%|##4   | 2/5 [00:10<00:15,  5.19s/it][A[I 2023-08-18 01:11:21,006] Trial 64 finished with value: 0.9948260112418215 and parameters: {'min_child_samples': 5}. Best is trial 63 with value: 0.9946600119505157.

min_data_in_leaf, val_score: 0.994660:  40%|##4   | 2/5 [00:10<00:15,  5.19s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



min_data_in_leaf, val_score: 0.994660:  40%|##4   | 2/5 [00:15<00:15,  5.19s/it][A
min_data_in_leaf, val_score: 0.994660:  60%|###6  | 3/5 [00:15<00:10,  5.18s/it][A[I 2023-08-18 01:11:26,177] Trial 65 finished with value: 0.995171975186894 and parameters: {'min_child_samples': 10}. Best is trial 63 with value: 0.9946600119505157.

min_data_in_leaf, val_score: 0.994660:  60%|###6  | 3/5 [00:15<00:10,  5.18s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



min_data_in_leaf, val_score: 0.994660:  60%|###6  | 3/5 [00:20<00:10,  5.18s/it][A
min_data_in_leaf, val_score: 0.994660:  80%|####8 | 4/5 [00:20<00:05,  5.16s/it][A[I 2023-08-18 01:11:31,303] Trial 66 finished with value: 0.9952372426453031 and parameters: {'min_child_samples': 25}. Best is trial 63 with value: 0.9946600119505157.

min_data_in_leaf, val_score: 0.994660:  80%|####8 | 4/5 [00:20<00:05,  5.16s/it][A

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4168
[LightGBM] [Info] Number of data points in the train set: 92400, number of used features: 695
[LightGBM] [Info] Start training from score -0.002987



min_data_in_leaf, val_score: 0.994660:  80%|####8 | 4/5 [00:25<00:05,  5.16s/it][A
min_data_in_leaf, val_score: 0.994660: 100%|######| 5/5 [00:25<00:00,  5.16s/it][A[I 2023-08-18 01:11:36,472] Trial 67 finished with value: 0.9950499558941363 and parameters: {'min_child_samples': 50}. Best is trial 63 with value: 0.9946600119505157.
min_data_in_leaf, val_score: 0.994660: 100%|######| 5/5 [00:25<00:00,  5.17s/it]

学習にかかった時間：362.05063104629517





In [21]:
opt.params

{'boosting_type': 'gbdt',
 'objective': 'regression',
 'metric': 'rmse',
 'random_state': 0,
 'feature_pre_filter': False,
 'lambda_l1': 0.003566907316652897,
 'lambda_l2': 1.9847715690369496,
 'num_leaves': 215,
 'feature_fraction': 0.52,
 'bagging_fraction': 1.0,
 'bagging_freq': 0,
 'min_child_samples': 100,
 'num_iterations': 5,
 'early_stopping_round': None}

In [26]:
# 精度
y_train_pred = opt.predict(X_train)
y_valid_pred = opt.predict(X_valid)
print(f"trainの精度：{cosine_similarity(y_train, y_train_pred)}")
print(f"validの精度：{cosine_similarity(y_valid, y_valid_pred)}")

trainの精度：0.3779359714929994
validの精度：0.09086984000249344


## テストデータの予測

In [27]:
# 予測
id_test = test_data["id"].values.reshape(-1)
X_test = test_data.drop(columns=["id"]).copy()
y_test_pred = opt.predict(X_test)

In [28]:
pd.DataFrame(y_test_pred)

Unnamed: 0,0
0,0.054365
1,0.015501
2,-0.007097
3,-0.007136
4,-0.083279
...,...
261656,0.028713
261657,0.036861
261658,-0.018638
261659,0.031034


In [29]:
# 保存
result = pd.DataFrame(data=y_test_pred, index=id_test, columns=["target"]).reset_index().rename(columns={"index": "id"})
result.to_csv(Path(RESULT_PATH, "20230817_result_with_optuna.csv"), index=False)