# ファイルのインポート

In [1]:
cd /content/drive/MyDrive/プログラミング/SIGNATE/国勢調査_収入予測

/content/drive/MyDrive/プログラミング/SIGNATE/国勢調査_収入予測


In [2]:
import numpy as np
import pandas as pd
import re
import pickle
import gc

# scikit-learn
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# LightGBM
import lightgbm as lgb

import warnings
warnings.filterwarnings("ignore")

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [None]:
display(train_data.head())

Unnamed: 0,index,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,native-country,Y
0,322,21,Private,Some-college,10,Divorced,Adm-clerical,Own-child,White,Female,United-States,0
1,11968,29,Private,HS-grad,9,Divorced,Adm-clerical,Not-in-family,White,Female,United-States,0
2,10868,19,Private,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Female,United-States,0
3,3394,17,Private,HS-grad,9,Never-married,Craft-repair,Own-child,White,Male,United-States,0
4,15993,47,Private,Some-college,10,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States,0


In [13]:
display(train_X.head())

Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex
0,21,3,12,10,0,1,3,2,0
1,29,3,9,9,0,1,1,2,0
2,19,3,8,13,2,8,1,2,0
3,17,3,9,9,2,2,3,2,1
4,47,3,12,10,1,2,0,2,1


# 特徴量エンジニアリング





In [3]:
def train_lgb(input_x,
              input_y,
              params,
              list_nfold=[0,1,2,3,4],
              n_splits=5,
             ):
    metrics = []
    imp = pd.DataFrame()

    # クロスバリデーション
    cv = list(StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=123).split(input_x, input_y))
    for nfold in list_nfold:
        print("-"*20, nfold, "-"*20)

        # データセット作成
        idx_tr, idx_va = cv[nfold][0], cv[nfold][1]
        x_tr, y_tr = input_x.loc[idx_tr, :], input_y[idx_tr]
        x_va, y_va = input_x.loc[idx_va, :], input_y[idx_va]
        print(x_tr.shape, x_va.shape)

        # カスタムメトリック関数
        def accuracy_metric(y_true, y_pred):
            y_pred_binary = (y_pred > 0.5).astype(int)
            accuracy = (y_true == y_pred_binary).mean()
            return 'accuracy', accuracy, True

        # モデルトレーニング
        model = lgb.LGBMClassifier(**params)

        model.fit(x_tr,
                  y_tr,
                  eval_set=[(x_tr,y_tr), (x_va,y_va)],
                  eval_metric=accuracy_metric,  # カスタムメトリックを指定
                  callbacks=[
                      lgb.early_stopping(stopping_rounds=100, verbose=True),
                      lgb.log_evaluation(100),
                  ]
                 )

        #モデル保存
        fname_lgb = "model_lgb_fold{}.pickle".format(nfold)
        with open(fname_lgb, "wb") as f:
            pickle.dump(model, f, protocol=4)

        # 予測と精度の計算
        y_tr_pred = model.predict(x_tr)
        y_va_pred = model.predict(x_va)

        tr_accuracy = accuracy_score(y_tr, y_tr_pred)
        va_accuracy = accuracy_score(y_va, y_va_pred)

        metrics.append([nfold, tr_accuracy, va_accuracy])

        # 重要度
        _imp = pd.DataFrame({"col":input_x.columns, "importance":model.feature_importances_, "nfold":nfold})
        imp = pd.concat([imp, _imp])

    print("-"*20, "result", "-"*20)

    # 正確度表作成
    metrics = np.array(metrics)
    print(metrics)
    print("[cv] tr:{:.4f}+-{:.4f}, va:{:.4f}+-{:.4f}".format(
        metrics[:,1].mean(), metrics[:,1].std(),
        metrics[:,2].mean(), metrics[:,2].std(),
    ))

    # 重要度表作成
    imp = imp.groupby("col")["importance"].agg(["mean", "std"]).reset_index(drop=False)
    imp.columns = ["col", "importance", "imp_std"]
    imp = imp.sort_values(by="importance", ascending=False)

    return imp

In [8]:
#データを読み取る
train_data = pd.read_csv('train.csv')

# 説明変数と目的変数に分割
train_X = train_data.drop(columns=['Y'])
train_y = train_data['Y']

#indexの削除
train_X = train_X.drop(columns='index')

# #native-countryの削除
# train_X = train_X.drop(columns='native-country')

# 文字列をカテゴリー変数に変換（Label Encoding）
categorical_features = train_X.select_dtypes(include=['object']).columns.tolist()
for col in categorical_features:
    le = LabelEncoder()
    train_X[col] = le.fit_transform(train_X[col])
#----------------------------------まだ確定していないコード----------------------------------




# モデル作成・評価

In [9]:
# モデルパラメータ
params = {
    "objective": "binary",
    "metric": "binary_logloss",
    "boosting_type": "gbdt",
    "learning_rate": 0.1925240065735636,
    "num_leaves": 136,
    "max_depth": 4,
    "random_state": 42
}

# 学習の実行
imp = train_lgb(train_X, train_y, params, list_nfold=[0,1,2,3,4,5,6,7,8,9], n_splits=10)

display(imp)

-------------------- 0 --------------------
(9938, 11) (1105, 11)
[LightGBM] [Info] Number of positive: 2415, number of negative: 7523
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013259 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 302
[LightGBM] [Info] Number of data points in the train set: 9938, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243007 -> initscore=-1.136266
[LightGBM] [Info] Start training from score -1.136266
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.290828	training's accuracy: 0.866975	valid_1's binary_logloss: 0.36009	valid_1's accuracy: 0.831674
Did not meet early stopping. Best iteration is:
[47]	training's binary_logloss: 0.304704	training's accuracy: 0.860837	valid_1's binary_logloss: 0.352082	valid_1's accuracy: 0.831674


Unnamed: 0,col,importance,imp_std
1,age_education-num,181.2,42.29736
6,occupation,155.0,42.050235
0,age,150.4,32.70474
2,education,68.6,16.53414
3,education-num,67.7,9.274218
10,workclass,58.8,13.669105
8,relationship,53.7,11.353414
4,marital-status,53.1,9.374315
9,sex,13.6,4.087923
7,race,13.1,5.820462


# ハイパーパラメーター自動調整

In [15]:
pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [16]:
import optuna
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# データ読み込み・前処理
train_data = pd.read_csv('train.csv')
train_X = train_data.drop(columns=['Y'])
train_y = train_data['Y']
train_X = train_X.drop(columns='index')

#ageと教育年数の積
train_X['age_education-num'] = train_X['age'] * train_X['education-num']

# カテゴリ変数のLabel Encodingと、リストの作成
categorical_features = train_X.select_dtypes(include=['object']).columns.tolist()
for col in categorical_features:
    le = LabelEncoder()
    train_X[col] = le.fit_transform(train_X[col])


# Optunaの目的関数
def objective(trial):
    # ハイパーパラメータの探索空間
    params = {
        "objective": "binary",
        "metric": "binary_logloss",
        "boosting_type": "gbdt",
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 150),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "random_state": 42
    }

    # クロスバリデーションの設定（例：5分割）
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    accuracies = []

    for train_idx, valid_idx in skf.split(train_X, train_y):
        X_train, X_valid = train_X.loc[train_idx], train_X.loc[valid_idx]
        y_train, y_valid = train_y.loc[train_idx], train_y.loc[valid_idx]

        # モデルの学習（categorical_featureも指定）
        model = lgb.LGBMClassifier(**params)
        # model.fit(
        #     X_train,
        #     y_train,
        #     eval_set=[(X_valid, y_valid)],
        #     early_stopping_rounds=100,
        #     verbose=False,
        #     categorical_feature=categorical_features
        # )

        model.fit(X_train,
                  y_train,
                  eval_set=[(X_train,y_train), (X_valid,y_valid)],
                  callbacks=[
                      lgb.early_stopping(stopping_rounds=100, verbose=True),
                      lgb.log_evaluation(100),
                  ],
                  categorical_feature=categorical_features
                 )

        # 予測と評価
        y_pred = model.predict(X_valid)
        acc = accuracy_score(y_valid, y_pred)
        accuracies.append(acc)

    # 5分割の平均正解率を返す（Optunaは目的関数の最小化または最大化を行う）
    return np.mean(accuracies)

# Studyの作成（ここでは正解率を最大化する）
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("Best trial:")
print("  Value: {:.4f}".format(study.best_trial.value))
print("  Params: ")
for key, value in study.best_trial.params.items():
    print("    {}: {}".format(key, value))


[I 2025-02-25 05:13:44,649] A new study created in memory with name: no-name-f2d350af-06f8-4a6f-ae4c-331a23b2fcca


[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000768 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.284253	valid_1's binary_logloss: 0.341805
Did not meet early stopping. Best iteration is:
[60]	training's binary_logloss: 0.300097	valid_1's binary_logloss: 0.340532
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000679 seconds.
You can se

[I 2025-02-25 05:13:45,939] Trial 0 finished with value: 0.8481392819887024 and parameters: {'learning_rate': 0.05911193313255364, 'num_leaves': 30, 'max_depth': 11}. Best is trial 0 with value: 0.8481392819887024.


[100]	training's binary_logloss: 0.286683	valid_1's binary_logloss: 0.345628
Did not meet early stopping. Best iteration is:
[65]	training's binary_logloss: 0.299662	valid_1's binary_logloss: 0.343609
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.227057	valid_1's binary_logloss: 0.38473
Did not meet early stopping. Best iteration is:
[25]	training's binary_logloss: 0.288668	valid_1's binary_l

[I 2025-02-25 05:13:47,544] Trial 1 finished with value: 0.8481390359596119 and parameters: {'learning_rate': 0.1246269675362577, 'num_leaves': 85, 'max_depth': 11}. Best is trial 0 with value: 0.8481392819887024.


[100]	training's binary_logloss: 0.230503	valid_1's binary_logloss: 0.372564
Did not meet early stopping. Best iteration is:
[27]	training's binary_logloss: 0.285952	valid_1's binary_logloss: 0.343882
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000670 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.377644	valid_1's binary_logloss: 0.3857
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.377644	valid_1's binary_l

[I 2025-02-25 05:13:48,454] Trial 2 finished with value: 0.8264057282133038 and parameters: {'learning_rate': 0.010987872977188326, 'num_leaves': 110, 'max_depth': 3}. Best is trial 0 with value: 0.8481392819887024.


[100]	training's binary_logloss: 0.376475	valid_1's binary_logloss: 0.389271
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.376475	valid_1's binary_logloss: 0.389271
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000659 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.304354	valid_1's binary_logloss: 0.340803
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.304354	valid_1's binar

[I 2025-02-25 05:13:49,681] Trial 3 finished with value: 0.8471432332158955 and parameters: {'learning_rate': 0.03409364537270148, 'num_leaves': 26, 'max_depth': 14}. Best is trial 0 with value: 0.8481392819887024.


[100]	training's binary_logloss: 0.30573	valid_1's binary_logloss: 0.344244
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.30573	valid_1's binary_logloss: 0.344244
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.217226	valid_1's binary_logloss: 0.402588
Did not meet early stopping. Best iteration is:
[17]	training's binary_logloss: 0.286071	valid_1's binary_l

[I 2025-02-25 05:13:50,999] Trial 4 finished with value: 0.8494971165390595 and parameters: {'learning_rate': 0.24266047321178422, 'num_leaves': 46, 'max_depth': 15}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.219802	valid_1's binary_logloss: 0.387706
Did not meet early stopping. Best iteration is:
[14]	training's binary_logloss: 0.294833	valid_1's binary_logloss: 0.344919
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000661 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.210315	valid_1's binary_logloss: 0.405685
Did not meet early stopping. Best iteration is:
[18]	training's binary_logloss: 0.288727	valid_1's binary_

[I 2025-02-25 05:13:52,773] Trial 5 finished with value: 0.8464187185492811 and parameters: {'learning_rate': 0.160900496725392, 'num_leaves': 106, 'max_depth': 12}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.211609	valid_1's binary_logloss: 0.394659
Did not meet early stopping. Best iteration is:
[22]	training's binary_logloss: 0.279239	valid_1's binary_logloss: 0.346801
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000743 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.242124	valid_1's binary_logloss: 0.36374
Did not meet early stopping. Best iteration is:
[38]	training's binary_logloss: 0.283377	valid_1's binary_l

[I 2025-02-25 05:13:54,250] Trial 6 finished with value: 0.8486826372350267 and parameters: {'learning_rate': 0.09983230397055064, 'num_leaves': 61, 'max_depth': 14}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.247239	valid_1's binary_logloss: 0.359016
Did not meet early stopping. Best iteration is:
[33]	training's binary_logloss: 0.291916	valid_1's binary_logloss: 0.343586
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000656 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.309351	valid_1's binary_logloss: 0.339823
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.309351	valid_1's binary

[I 2025-02-25 05:13:56,139] Trial 7 finished with value: 0.8476867524816134 and parameters: {'learning_rate': 0.03562561359207356, 'num_leaves': 98, 'max_depth': 5}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.308859	valid_1's binary_logloss: 0.34367
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.308859	valid_1's binary_logloss: 0.34367
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001051 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.327744	valid_1's binary_logloss: 0.343841
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.327744	valid_1's binary_

[I 2025-02-25 05:13:57,582] Trial 8 finished with value: 0.8457856446946286 and parameters: {'learning_rate': 0.03611743942466417, 'num_leaves': 66, 'max_depth': 3}. Best is trial 4 with value: 0.8494971165390595.


[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000983 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.348266	valid_1's binary_logloss: 0.372191
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.348266	valid_1's binary_logloss: 0.372191
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000723 seconds.
You can s

[I 2025-02-25 05:13:59,421] Trial 9 finished with value: 0.8415292594196337 and parameters: {'learning_rate': 0.011863458289352063, 'num_leaves': 86, 'max_depth': 11}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.348757	valid_1's binary_logloss: 0.372711
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.348757	valid_1's binary_logloss: 0.372711
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000666 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.216017	valid_1's binary_logloss: 0.412157
Did not meet early stopping. Best iteration is:
[13]	training's binary_logloss: 0.291025	valid_1's binary

[I 2025-02-25 05:14:00,770] Trial 10 finished with value: 0.8471435202498343 and parameters: {'learning_rate': 0.29970896538475816, 'num_leaves': 138, 'max_depth': 7}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.221352	valid_1's binary_logloss: 0.382454
Did not meet early stopping. Best iteration is:
[13]	training's binary_logloss: 0.291995	valid_1's binary_logloss: 0.34235
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000655 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.206032	valid_1's binary_logloss: 0.424824
Did not meet early stopping. Best iteration is:
[11]	training's binary_logloss: 0.293359	valid_1's binary_l

[I 2025-02-25 05:14:02,108] Trial 11 finished with value: 0.8459657789937083 and parameters: {'learning_rate': 0.29470909296898, 'num_leaves': 50, 'max_depth': 14}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.208022	valid_1's binary_logloss: 0.411804
Did not meet early stopping. Best iteration is:
[12]	training's binary_logloss: 0.291866	valid_1's binary_logloss: 0.344469
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.244112	valid_1's binary_logloss: 0.36601
Did not meet early stopping. Best iteration is:
[34]	training's binary_logloss: 0.28652	valid_1's binary_lo

[I 2025-02-25 05:14:03,537] Trial 12 finished with value: 0.8475959677472265 and parameters: {'learning_rate': 0.11011609525027724, 'num_leaves': 52, 'max_depth': 15}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.24757	valid_1's binary_logloss: 0.358766
Did not meet early stopping. Best iteration is:
[35]	training's binary_logloss: 0.287648	valid_1's binary_logloss: 0.343192
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001003 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.261249	valid_1's binary_logloss: 0.352883
Did not meet early stopping. Best iteration is:
[45]	training's binary_logloss: 0.291935	valid_1's binary_l

[I 2025-02-25 05:14:05,026] Trial 13 finished with value: 0.8483206874380826 and parameters: {'learning_rate': 0.07434076025875208, 'num_leaves': 60, 'max_depth': 8}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.266395	valid_1's binary_logloss: 0.349465
Did not meet early stopping. Best iteration is:
[47]	training's binary_logloss: 0.292988	valid_1's binary_logloss: 0.342589
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000720 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.236042	valid_1's binary_logloss: 0.377648
Did not meet early stopping. Best iteration is:
[18]	training's binary_logloss: 0.298464	valid_1's binary_

[I 2025-02-25 05:14:06,267] Trial 14 finished with value: 0.8484107340851981 and parameters: {'learning_rate': 0.18152033126077394, 'num_leaves': 38, 'max_depth': 13}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.238886	valid_1's binary_logloss: 0.365802
Did not meet early stopping. Best iteration is:
[20]	training's binary_logloss: 0.295723	valid_1's binary_logloss: 0.342823
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000672 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.241437	valid_1's binary_logloss: 0.3678
Did not meet early stopping. Best iteration is:
[37]	training's binary_logloss: 0.285421	valid_1's binary_lo

[I 2025-02-25 05:14:07,904] Trial 15 finished with value: 0.8464186775444329 and parameters: {'learning_rate': 0.09036780316357618, 'num_leaves': 73, 'max_depth': 15}. Best is trial 4 with value: 0.8494971165390595.


[100]	training's binary_logloss: 0.243779	valid_1's binary_logloss: 0.361654
Did not meet early stopping. Best iteration is:
[36]	training's binary_logloss: 0.289785	valid_1's binary_logloss: 0.342802
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.231905	valid_1's binary_logloss: 0.382794
Did not meet early stopping. Best iteration is:
[17]	training's binary_logloss: 0.297774	valid_1's binary_

[I 2025-02-25 05:14:09,768] Trial 16 finished with value: 0.8495879012734466 and parameters: {'learning_rate': 0.18979045555717303, 'num_leaves': 43, 'max_depth': 9}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.233479	valid_1's binary_logloss: 0.366542
Did not meet early stopping. Best iteration is:
[24]	training's binary_logloss: 0.285512	valid_1's binary_logloss: 0.342853
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001044 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.255951	valid_1's binary_logloss: 0.361537
Did not meet early stopping. Best iteration is:
[24]	training's binary_logloss: 0.300114	valid_1's binary_

[I 2025-02-25 05:14:11,384] Trial 17 finished with value: 0.8466000009841164 and parameters: {'learning_rate': 0.19107820568828612, 'num_leaves': 21, 'max_depth': 9}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.257722	valid_1's binary_logloss: 0.357444
Did not meet early stopping. Best iteration is:
[29]	training's binary_logloss: 0.297043	valid_1's binary_logloss: 0.340952
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001024 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.225162	valid_1's binary_logloss: 0.396232
Did not meet early stopping. Best iteration is:
[16]	training's binary_logloss: 0.292802	valid_1's binary_

[I 2025-02-25 05:14:12,779] Trial 18 finished with value: 0.8485918525006397 and parameters: {'learning_rate': 0.22823693733293468, 'num_leaves': 42, 'max_depth': 9}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.228804	valid_1's binary_logloss: 0.37762
Did not meet early stopping. Best iteration is:
[18]	training's binary_logloss: 0.290506	valid_1's binary_logloss: 0.342524
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.319651	valid_1's binary_logloss: 0.346634
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.319651	valid_1's binary_

[I 2025-02-25 05:14:14,224] Trial 19 finished with value: 0.8471435202498343 and parameters: {'learning_rate': 0.021724111852127654, 'num_leaves': 127, 'max_depth': 6}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.320475	valid_1's binary_logloss: 0.349507
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.320475	valid_1's binary_logloss: 0.349507
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000707 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.227328	valid_1's binary_logloss: 0.386988
Did not meet early stopping. Best iteration is:
[26]	training's binary_logloss: 0.283805	valid_1's binary

[I 2025-02-25 05:14:15,778] Trial 20 finished with value: 0.8476860143943419 and parameters: {'learning_rate': 0.14159260725632872, 'num_leaves': 73, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.228248	valid_1's binary_logloss: 0.373609
Did not meet early stopping. Best iteration is:
[24]	training's binary_logloss: 0.288848	valid_1's binary_logloss: 0.342843
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000664 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.252051	valid_1's binary_logloss: 0.359889
Did not meet early stopping. Best iteration is:
[34]	training's binary_logloss: 0.292992	valid_1's binary_

[I 2025-02-25 05:14:17,170] Trial 21 finished with value: 0.8485013137953432 and parameters: {'learning_rate': 0.10215240010647723, 'num_leaves': 45, 'max_depth': 13}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.256064	valid_1's binary_logloss: 0.356798
Did not meet early stopping. Best iteration is:
[31]	training's binary_logloss: 0.298458	valid_1's binary_logloss: 0.343455
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000653 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.214071	valid_1's binary_logloss: 0.40643
Did not meet early stopping. Best iteration is:
[16]	training's binary_logloss: 0.288153	valid_1's binary_l

[I 2025-02-25 05:14:18,619] Trial 22 finished with value: 0.8489539253121288 and parameters: {'learning_rate': 0.21150901022979582, 'num_leaves': 58, 'max_depth': 13}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.215867	valid_1's binary_logloss: 0.39415
Did not meet early stopping. Best iteration is:
[14]	training's binary_logloss: 0.29623	valid_1's binary_logloss: 0.346918
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000659 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.229992	valid_1's binary_logloss: 0.384067
Did not meet early stopping. Best iteration is:
[20]	training's binary_logloss: 0.288988	valid_1's binary_lo

[I 2025-02-25 05:14:19,791] Trial 23 finished with value: 0.8475053060273845 and parameters: {'learning_rate': 0.23518315047810742, 'num_leaves': 33, 'max_depth': 12}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.234215	valid_1's binary_logloss: 0.374812
Did not meet early stopping. Best iteration is:
[13]	training's binary_logloss: 0.304382	valid_1's binary_logloss: 0.347218
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000709 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.226344	valid_1's binary_logloss: 0.391094
Did not meet early stopping. Best iteration is:
[18]	training's binary_logloss: 0.289235	valid_1's binary_

[I 2025-02-25 05:14:21,090] Trial 24 finished with value: 0.8486825552253299 and parameters: {'learning_rate': 0.21557991361402637, 'num_leaves': 51, 'max_depth': 8}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.227839	valid_1's binary_logloss: 0.374653
Did not meet early stopping. Best iteration is:
[20]	training's binary_logloss: 0.285872	valid_1's binary_logloss: 0.343176
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000854 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.221112	valid_1's binary_logloss: 0.391125
Did not meet early stopping. Best iteration is:
[19]	training's binary_logloss: 0.294033	valid_1's binary_

[I 2025-02-25 05:14:23,198] Trial 25 finished with value: 0.8461468974091495 and parameters: {'learning_rate': 0.14822860701765284, 'num_leaves': 72, 'max_depth': 15}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.2224	valid_1's binary_logloss: 0.383401
Did not meet early stopping. Best iteration is:
[21]	training's binary_logloss: 0.291821	valid_1's binary_logloss: 0.345056
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001617 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.200363	valid_1's binary_logloss: 0.433176
Did not meet early stopping. Best iteration is:
[13]	training's binary_logloss: 0.285354	valid_1's binary_lo

[I 2025-02-25 05:14:25,566] Trial 26 finished with value: 0.8464186775444329 and parameters: {'learning_rate': 0.26224291342299616, 'num_leaves': 85, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.204504	valid_1's binary_logloss: 0.412014
Did not meet early stopping. Best iteration is:
[14]	training's binary_logloss: 0.281431	valid_1's binary_logloss: 0.345294
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000655 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.256555	valid_1's binary_logloss: 0.356211
Did not meet early stopping. Best iteration is:
[45]	training's binary_logloss: 0.288631	valid_1's binary_

[I 2025-02-25 05:14:27,068] Trial 27 finished with value: 0.8476864654476746 and parameters: {'learning_rate': 0.07772150056735148, 'num_leaves': 56, 'max_depth': 13}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.259544	valid_1's binary_logloss: 0.353908
Did not meet early stopping. Best iteration is:
[45]	training's binary_logloss: 0.290745	valid_1's binary_logloss: 0.34347
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000650 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.229408	valid_1's binary_logloss: 0.383238
Did not meet early stopping. Best iteration is:
[16]	training's binary_logloss: 0.300197	valid_1's binary_l

[I 2025-02-25 05:14:28,386] Trial 28 finished with value: 0.8472331158436173 and parameters: {'learning_rate': 0.17975032237150454, 'num_leaves': 46, 'max_depth': 12}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.231204	valid_1's binary_logloss: 0.375925
Did not meet early stopping. Best iteration is:
[25]	training's binary_logloss: 0.283033	valid_1's binary_logloss: 0.341715
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000691 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.28017	valid_1's binary_logloss: 0.343803
Did not meet early stopping. Best iteration is:
[62]	training's binary_logloss: 0.295404	valid_1's binary_l

[I 2025-02-25 05:14:29,755] Trial 29 finished with value: 0.8488635506262259 and parameters: {'learning_rate': 0.062370821972617514, 'num_leaves': 33, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.282329	valid_1's binary_logloss: 0.344854
Did not meet early stopping. Best iteration is:
[74]	training's binary_logloss: 0.291448	valid_1's binary_logloss: 0.342622
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000677 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.270798	valid_1's binary_logloss: 0.349772
Did not meet early stopping. Best iteration is:
[41]	training's binary_logloss: 0.296759	valid_1's binary_

[I 2025-02-25 05:14:30,846] Trial 30 finished with value: 0.8485917294860943 and parameters: {'learning_rate': 0.13437778739982514, 'num_leaves': 20, 'max_depth': 8}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.273893	valid_1's binary_logloss: 0.347868
Did not meet early stopping. Best iteration is:
[38]	training's binary_logloss: 0.300737	valid_1's binary_logloss: 0.340771
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000641 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.277862	valid_1's binary_logloss: 0.345283
Did not meet early stopping. Best iteration is:
[68]	training's binary_logloss: 0.290122	valid_1's binary_

[I 2025-02-25 05:14:32,246] Trial 31 finished with value: 0.8480488662979511 and parameters: {'learning_rate': 0.06607731684201829, 'num_leaves': 33, 'max_depth': 11}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.281618	valid_1's binary_logloss: 0.345434
Did not meet early stopping. Best iteration is:
[63]	training's binary_logloss: 0.294815	valid_1's binary_logloss: 0.343399
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000664 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.28491	valid_1's binary_logloss: 0.341866
Did not meet early stopping. Best iteration is:
[78]	training's binary_logloss: 0.293239	valid_1's binary_l

[I 2025-02-25 05:14:33,663] Trial 32 finished with value: 0.8493166542011927 and parameters: {'learning_rate': 0.049291617029741136, 'num_leaves': 37, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.287414	valid_1's binary_logloss: 0.343692
Did not meet early stopping. Best iteration is:
[72]	training's binary_logloss: 0.298241	valid_1's binary_logloss: 0.342976
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000661 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.290259	valid_1's binary_logloss: 0.341669
Did not meet early stopping. Best iteration is:
[82]	training's binary_logloss: 0.29711	valid_1's binary_l

[I 2025-02-25 05:14:35,059] Trial 33 finished with value: 0.8475054700467783 and parameters: {'learning_rate': 0.043086900512761046, 'num_leaves': 40, 'max_depth': 7}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.29264	valid_1's binary_logloss: 0.342892
Did not meet early stopping. Best iteration is:
[95]	training's binary_logloss: 0.294266	valid_1's binary_logloss: 0.342772
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001430 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.289297	valid_1's binary_logloss: 0.340396
Did not meet early stopping. Best iteration is:
[84]	training's binary_logloss: 0.294326	valid_1's binary_logloss: 0.339169
[LightGBM] [Info] Number of positive: 2147, num

[I 2025-02-25 05:14:36,881] Trial 34 finished with value: 0.8472337719211918 and parameters: {'learning_rate': 0.05137752371249003, 'num_leaves': 29, 'max_depth': 14}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.291333	valid_1's binary_logloss: 0.343306
Did not meet early stopping. Best iteration is:
[97]	training's binary_logloss: 0.292213	valid_1's binary_logloss: 0.34305
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.301062	valid_1's binary_logloss: 0.345152
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.301062	valid_1's binary_

[I 2025-02-25 05:14:39,319] Trial 35 finished with value: 0.8473243926361853 and parameters: {'learning_rate': 0.026067486631791283, 'num_leaves': 63, 'max_depth': 12}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.303046	valid_1's binary_logloss: 0.346351
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.303046	valid_1's binary_logloss: 0.346351
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000646 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.312423	valid_1's binary_logloss: 0.344911
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.312423	valid_1's binar

[I 2025-02-25 05:14:40,753] Trial 36 finished with value: 0.8460567687523373 and parameters: {'learning_rate': 0.023481465629705846, 'num_leaves': 40, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.313857	valid_1's binary_logloss: 0.347588
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.313857	valid_1's binary_logloss: 0.347588
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000721 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.26492	valid_1's binary_logloss: 0.351991
Did not meet early stopping. Best iteration is:
[39]	training's binary_logloss: 0.294943	valid_1's binary_

[I 2025-02-25 05:14:41,922] Trial 37 finished with value: 0.8477768811384259 and parameters: {'learning_rate': 0.11850447612838348, 'num_leaves': 26, 'max_depth': 13}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.268601	valid_1's binary_logloss: 0.348923
Did not meet early stopping. Best iteration is:
[45]	training's binary_logloss: 0.292582	valid_1's binary_logloss: 0.341608
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000756 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.299808	valid_1's binary_logloss: 0.337898
Did not meet early stopping. Best iteration is:
[98]	training's binary_logloss: 0.300184	valid_1's binary_

[I 2025-02-25 05:14:43,238] Trial 38 finished with value: 0.8483202773895986 and parameters: {'learning_rate': 0.052399818223522396, 'num_leaves': 79, 'max_depth': 5}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.299403	valid_1's binary_logloss: 0.342853
Did not meet early stopping. Best iteration is:
[96]	training's binary_logloss: 0.300311	valid_1's binary_logloss: 0.342686
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000722 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.215205	valid_1's binary_logloss: 0.39588
Did not meet early stopping. Best iteration is:
[22]	training's binary_logloss: 0.28029	valid_1's binary_lo

[I 2025-02-25 05:14:44,929] Trial 39 finished with value: 0.8456035421628254 and parameters: {'learning_rate': 0.16024619167438822, 'num_leaves': 95, 'max_depth': 11}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.216693	valid_1's binary_logloss: 0.387192
Did not meet early stopping. Best iteration is:
[21]	training's binary_logloss: 0.284371	valid_1's binary_logloss: 0.344239
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000706 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.325883	valid_1's binary_logloss: 0.354447
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.325883	valid_1's binary

[I 2025-02-25 05:14:46,422] Trial 40 finished with value: 0.8464189645783717 and parameters: {'learning_rate': 0.017087081822070155, 'num_leaves': 55, 'max_depth': 14}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.327003	valid_1's binary_logloss: 0.355952
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.327003	valid_1's binary_logloss: 0.355952
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.290069	valid_1's binary_logloss: 0.340459
Did not meet early stopping. Best iteration is:
[80]	training's binary_logloss: 0.297546	valid_1's binary

[I 2025-02-25 05:14:47,836] Trial 41 finished with value: 0.8481393229935508 and parameters: {'learning_rate': 0.043880519689023574, 'num_leaves': 35, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.292157	valid_1's binary_logloss: 0.343848
Did not meet early stopping. Best iteration is:
[79]	training's binary_logloss: 0.299913	valid_1's binary_logloss: 0.343538
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000673 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.309721	valid_1's binary_logloss: 0.341607
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.309721	valid_1's binary

[I 2025-02-25 05:14:49,116] Trial 42 finished with value: 0.8471433972352891 and parameters: {'learning_rate': 0.029456809549377894, 'num_leaves': 26, 'max_depth': 9}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.310738	valid_1's binary_logloss: 0.345866
Did not meet early stopping. Best iteration is:
[100]	training's binary_logloss: 0.310738	valid_1's binary_logloss: 0.345866
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001082 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.273448	valid_1's binary_logloss: 0.345301
Did not meet early stopping. Best iteration is:
[56]	training's binary_logloss: 0.294498	valid_1's binary

[I 2025-02-25 05:14:51,308] Trial 43 finished with value: 0.8484107750900467 and parameters: {'learning_rate': 0.059941998808238216, 'num_leaves': 45, 'max_depth': 11}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.276375	valid_1's binary_logloss: 0.345749
Did not meet early stopping. Best iteration is:
[65]	training's binary_logloss: 0.291646	valid_1's binary_logloss: 0.342525
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001182 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.260268	valid_1's binary_logloss: 0.354242
Did not meet early stopping. Best iteration is:
[46]	training's binary_logloss: 0.2877	valid_1's binary_lo

[I 2025-02-25 05:14:53,282] Trial 44 finished with value: 0.8483199903556595 and parameters: {'learning_rate': 0.08727931252880833, 'num_leaves': 48, 'max_depth': 8}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.263225	valid_1's binary_logloss: 0.352301
Did not meet early stopping. Best iteration is:
[47]	training's binary_logloss: 0.289452	valid_1's binary_logloss: 0.343733
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000745 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.206927	valid_1's binary_logloss: 0.419439
Did not meet early stopping. Best iteration is:
[12]	training's binary_logloss: 0.2911	valid_1's binary_lo

[I 2025-02-25 05:14:54,788] Trial 45 finished with value: 0.8484109391094403 and parameters: {'learning_rate': 0.26301078475814615, 'num_leaves': 67, 'max_depth': 9}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.209331	valid_1's binary_logloss: 0.39494
Did not meet early stopping. Best iteration is:
[11]	training's binary_logloss: 0.297487	valid_1's binary_logloss: 0.344831
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.231762	valid_1's binary_logloss: 0.379695
Did not meet early stopping. Best iteration is:
[17]	training's binary_logloss: 0.293549	valid_1's binary_l

[I 2025-02-25 05:14:56,123] Trial 46 finished with value: 0.8466903346651709 and parameters: {'learning_rate': 0.20328667162905317, 'num_leaves': 58, 'max_depth': 7}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.232806	valid_1's binary_logloss: 0.371306
Did not meet early stopping. Best iteration is:
[19]	training's binary_logloss: 0.29115	valid_1's binary_logloss: 0.344346
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000715 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.240522	valid_1's binary_logloss: 0.371595
Did not meet early stopping. Best iteration is:
[25]	training's binary_logloss: 0.289241	valid_1's binary_l

[I 2025-02-25 05:14:57,380] Trial 47 finished with value: 0.8482296156697569 and parameters: {'learning_rate': 0.16419637470689813, 'num_leaves': 37, 'max_depth': 15}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.244436	valid_1's binary_logloss: 0.363211
Did not meet early stopping. Best iteration is:
[26]	training's binary_logloss: 0.291098	valid_1's binary_logloss: 0.34271
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000667 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.271443	valid_1's binary_logloss: 0.345094
Did not meet early stopping. Best iteration is:
[83]	training's binary_logloss: 0.280401	valid_1's binary_l

[I 2025-02-25 05:14:59,695] Trial 48 finished with value: 0.8488633866068322 and parameters: {'learning_rate': 0.04234907734324637, 'num_leaves': 149, 'max_depth': 10}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.272933	valid_1's binary_logloss: 0.346493
Did not meet early stopping. Best iteration is:
[84]	training's binary_logloss: 0.281919	valid_1's binary_logloss: 0.344932
[LightGBM] [Info] Number of positive: 2147, number of negative: 6687
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000890 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 301
[LightGBM] [Info] Number of data points in the train set: 8834, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.243038 -> initscore=-1.136094
[LightGBM] [Info] Start training from score -1.136094
Training until validation scores don't improve for 100 rounds
[100]	training's binary_logloss: 0.233953	valid_1's binary_logloss: 0.378867
Did not meet early stopping. Best iteration is:
[18]	training's binary_logloss: 0.292661	valid_1's binary_

[I 2025-02-25 05:15:00,900] Trial 49 finished with value: 0.8494069468773988 and parameters: {'learning_rate': 0.24958108911753976, 'num_leaves': 29, 'max_depth': 12}. Best is trial 16 with value: 0.8495879012734466.


[100]	training's binary_logloss: 0.235471	valid_1's binary_logloss: 0.373537
Did not meet early stopping. Best iteration is:
[20]	training's binary_logloss: 0.292224	valid_1's binary_logloss: 0.343843
Best trial:
  Value: 0.8496
  Params: 
    learning_rate: 0.18979045555717303
    num_leaves: 43
    max_depth: 9




# 推論提出部分

In [11]:
def predict_lgb(input_x, list_nfold=[0, 1, 2, 3, 4]):
  # 推論値を格納する変数を作成
  test_pred = np.zeros((len(input_x), len(list_nfold)))

  for nfold in list_nfold:
    print('-'*20, nfold, '-'*20)
    with open("model_lgb_fold{}.pickle".format(nfold), "rb") as f:
      model = pickle.load(f)

    # 推論
    test_pred_fold = model.predict(input_x)

    # 1fold目の予測値を格納
    test_pred[:, nfold] = test_pred_fold

  # 各foldの推論値の平均値を算出
  test_pred_mean = test_pred.mean(axis=1)
  final_test_preds = (test_pred_mean > 0.5).astype(int)
  print('Done.')

  return final_test_preds

In [10]:
test_data = pd.read_csv('test.csv')
test_data_index = pd.read_csv('test.csv')
sample_sub = pd.read_csv('sample_submit.csv')

#indexの削除
test_data = test_data.drop(columns='index')

# 文字列をカテゴリー変数に変換（Label Encoding）
categorical_features = test_data.select_dtypes(include=['object']).columns.tolist()
for col in categorical_features:
    le = LabelEncoder()
    test_data[col] = le.fit_transform(test_data[col])



# #native-countryの削除
# test_data = test_data.drop(columns='native-country')

In [12]:
#推論
test_pred = predict_lgb(test_data, list_nfold=[0,1,2,3,4,5,6,7,8,9])

-------------------- 0 --------------------
-------------------- 1 --------------------
-------------------- 2 --------------------
-------------------- 3 --------------------
-------------------- 4 --------------------
-------------------- 5 --------------------
-------------------- 6 --------------------
-------------------- 7 --------------------
-------------------- 8 --------------------
-------------------- 9 --------------------
Done.


In [13]:
#indexと推論データの結合
sub_data = pd.concat([test_data_index['index'], pd.DataFrame(test_pred)], axis=1)

#カラム部分の削除
sub_data.columns = [3625, 0]
sub_data = sub_data.iloc[1:].reset_index(drop=True)

# ファイル出力
sub_data.to_csv("submission2月25日1回目.csv", index=None)