In [1]:
# get workshop code
import os
import sys
IN_COLAB = os.getenv("COLAB_RELEASE_TAG")
if IN_COLAB:
    !git clone https://github.com/rajaonsonella/crosstalk-q2-2025
    sys.path.append('./crosstalk-q2-2025')
else:
    sys.path.append('..')
!pip install -r crosstalk-q2-2025/requirements.txt

Cloning into 'crosstalk-q2-2025'...
remote: Enumerating objects: 369, done.[K
remote: Counting objects: 100% (121/121), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 369 (delta 74), reused 41 (delta 27), pack-reused 248 (from 1)[K
Receiving objects: 100% (369/369), 37.60 MiB | 39.33 MiB/s, done.
Resolving deltas: 100% (198/198), done.
Collecting catboost (from -r crosstalk-q2-2025/requirements.txt (line 5))
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting pympler (from -r crosstalk-q2-2025/requirements.txt (line 6))
  Downloading Pympler-1.1-py3-none-any.whl.metadata (3.6 kB)
Collecting rdkit (from -r crosstalk-q2-2025/requirements.txt (line 8))
  Downloading rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m24.5 MB/s[0m eta [36m0:0

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
# Download data from google drive
import gdown
import os

file_ids = {'test_inputs' : '1Gyv_ldUTi0Ymy6wVMfruAO0UraCQ70CR',
            'train': '11S5p0QgP1X9rOFiIjNSLydLenJwm7hle'}

for name, file_id in file_ids.items():
    filename = f'crosstalk_{name}.parquet'
    if not os.path.exists(filename):
        gdown.download(id=file_id, output=filename, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1Gyv_ldUTi0Ymy6wVMfruAO0UraCQ70CR
From (redirected): https://drive.google.com/uc?id=1Gyv_ldUTi0Ymy6wVMfruAO0UraCQ70CR&confirm=t&uuid=ee8140ca-a6f9-4a8f-a585-1f3a9c92dad7
To: /content/crosstalk_test_inputs.parquet
100%|██████████| 1.52G/1.52G [00:11<00:00, 137MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=11S5p0QgP1X9rOFiIjNSLydLenJwm7hle
From (redirected): https://drive.google.com/uc?id=11S5p0QgP1X9rOFiIjNSLydLenJwm7hle&confirm=t&uuid=21c76152-cacd-4eff-8862-bda31ccc3d4e
To: /content/crosstalk_train.parquet
100%|██████████| 1.97G/1.97G [00:19<00:00, 102MB/s]


In [4]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.2-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.2 colorlog-6.9.0 optuna-4.4.0


## Train with ECFP6

In [5]:
# === Imports ===
import numpy as np
import pandas as pd
import optuna
from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from eval import BinaryEvaluator  # assumes you have this module

# === Optional Balanced Data Loader ===
def load_balanced_dataframe(path, x_col, y_col, ratio=2, balance=True):
    """
    Load a DataFrame from parquet and optionally balance it to a given negative:positive ratio.
    """
    df = pd.read_parquet(path)

    if not balance:
        return df

    pos = df[df[y_col] == 1]
    neg = df[df[y_col] == 0].sample(n=len(pos) * ratio, random_state=42)
    df_balanced = pd.concat([pos, neg]).sample(frac=1, random_state=42)  # shuffle

    return df_balanced

# === Load and Preprocess Data ===
balance_data = True  # Toggle this flag to use balanced or full dataset
data_path = "/content/crosstalk_train.parquet"
x_col = "ECFP6"
y_col = "DELLabel"

df_train = load_balanced_dataframe(path=data_path, x_col=x_col, y_col=y_col, ratio=2, balance=balance_data)
X_full = df_train[x_col].str.split(',', expand=True).astype(float).values
y_full = df_train[y_col].values

# === Optuna Objective Function ===
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 300),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 15, 150),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),  # min_samples_leaf
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),  # L1
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0),  # L2
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 1.0),
        "max_bin": trial.suggest_int("max_bin", 127, 511),
        "boosting_type": "gbdt",
        "n_jobs": 1,
        "use_best_model": True,
        "random_state": 42,
    }

    auc_scores = []
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for train_idx, val_idx in skf.split(X_full, y_full):
        X_train, X_val = X_full[train_idx], X_full[val_idx]
        y_train, y_val = y_full[train_idx], y_full[val_idx]

        model = LGBMClassifier(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, y_pred)
        auc_scores.append(auc)

    return np.mean(auc_scores)

# === Run Optuna ===
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)  # Increase for better results

# === Best Model Info ===
print("\nBest Trial for AUROC (5-fold CV):")
print(f"Mean AUROC: {study.best_value:.4f}")
print("Best Parameters:")
for key, val in study.best_params.items():
    print(f"  {key}: {val}")

# === Final Model Training ===
best_params = study.best_params
best_params.update({
    "boosting_type": "gbdt",
    "n_jobs": 1,
    "use_best_model": True,
    "random_state": 42
})

final_model = LGBMClassifier(**best_params)
final_model.fit(X_full, y_full)

# === Final Evaluation ===
evaluator = BinaryEvaluator(X_full, y_full)
metric_dict_cv = evaluator.CV_model(final_model)

print("\n📊 Final LightGBM Model Evaluation (CV):")
for metric_name, metric_value in metric_dict_cv['mean'].items():
    print(f'{metric_name:20s}: {metric_value:.4f}')


[I 2025-06-26 01:16:04,955] A new study created in memory with name: no-name-0872a77b-4955-46f4-a004-25da69f316e8


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.510368 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.571943 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.569959 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.570333 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.599566 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:17:19,575] Trial 0 finished with value: 0.9778672492956474 and parameters: {'n_estimators': 271, 'learning_rate': 0.18754822669986923, 'num_leaves': 129, 'max_depth': 15, 'min_child_samples': 50, 'subsample': 0.5393951809922277, 'colsample_bytree': 0.667797773988248, 'reg_alpha': 0.5862586421402817, 'reg_lambda': 0.022519343695244443, 'min_split_gain': 0.4970021329583617, 'max_bin': 468}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.590133 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.606319 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.589876 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.542648 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.620233 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:18:16,209] Trial 1 finished with value: 0.9186238391570202 and parameters: {'n_estimators': 171, 'learning_rate': 0.09418567438064175, 'num_leaves': 126, 'max_depth': 3, 'min_child_samples': 25, 'subsample': 0.5562052625079119, 'colsample_bytree': 0.8722030228897177, 'reg_alpha': 0.09058287751981886, 'reg_lambda': 0.8234868075494339, 'min_split_gain': 0.7604169907079606, 'max_bin': 491}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.435877 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.631981 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.601435 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.297908 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.590303 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:19:24,762] Trial 2 finished with value: 0.9760815959125682 and parameters: {'n_estimators': 160, 'learning_rate': 0.13920890688927862, 'num_leaves': 84, 'max_depth': 15, 'min_child_samples': 29, 'subsample': 0.5814842972158225, 'colsample_bytree': 0.990538071761458, 'reg_alpha': 0.44738850287158427, 'reg_lambda': 0.9257486346621717, 'min_split_gain': 0.7931952327065577, 'max_bin': 468}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.373403 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.418928 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.358628 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.566519 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.587039 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:20:29,322] Trial 3 finished with value: 0.9775474962480872 and parameters: {'n_estimators': 162, 'learning_rate': 0.2664620550396434, 'num_leaves': 133, 'max_depth': 15, 'min_child_samples': 24, 'subsample': 0.6779379552908898, 'colsample_bytree': 0.8631036670317462, 'reg_alpha': 0.7932182494987259, 'reg_lambda': 0.6429877456481133, 'min_split_gain': 0.46627936107683376, 'max_bin': 162}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.571264 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.600395 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.558851 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.556390 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.625759 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:21:25,565] Trial 4 finished with value: 0.9375390675210531 and parameters: {'n_estimators': 169, 'learning_rate': 0.18338050305447937, 'num_leaves': 81, 'max_depth': 3, 'min_child_samples': 8, 'subsample': 0.6755200583540419, 'colsample_bytree': 0.9638830451121443, 'reg_alpha': 0.11770484007526272, 'reg_lambda': 0.1331086481425361, 'min_split_gain': 0.7410585177890878, 'max_bin': 365}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.606360 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.620352 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.631559 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.606995 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.609561 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:22:18,033] Trial 5 finished with value: 0.9473846335185925 and parameters: {'n_estimators': 67, 'learning_rate': 0.2429771310198672, 'num_leaves': 17, 'max_depth': 15, 'min_child_samples': 6, 'subsample': 0.5603796302503121, 'colsample_bytree': 0.894984388588764, 'reg_alpha': 0.7501433765675583, 'reg_lambda': 0.2650217851457385, 'min_split_gain': 0.3366589541643261, 'max_bin': 308}. Best is trial 0 with value: 0.9778672492956474.






[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.604797 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.597567 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.598387 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.603470 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.596545 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:23:19,418] Trial 6 finished with value: 0.9546652141158548 and parameters: {'n_estimators': 226, 'learning_rate': 0.169707510342669, 'num_leaves': 148, 'max_depth': 4, 'min_child_samples': 34, 'subsample': 0.911082099552915, 'colsample_bytree': 0.6662613226210674, 'reg_alpha': 0.33879115142166705, 'reg_lambda': 0.5284806592529474, 'min_split_gain': 0.271489521160201, 'max_bin': 206}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.646777 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.616484 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.595938 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.611630 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.627399 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:24:31,483] Trial 7 finished with value: 0.9760550537126675 and parameters: {'n_estimators': 299, 'learning_rate': 0.2567956619258493, 'num_leaves': 52, 'max_depth': 12, 'min_child_samples': 47, 'subsample': 0.75378845378452, 'colsample_bytree': 0.9409276689319557, 'reg_alpha': 0.9269101207448208, 'reg_lambda': 0.3601756254054681, 'min_split_gain': 0.5518944587736478, 'max_bin': 294}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.636899 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.617268 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.602508 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.617609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.648672 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:25:36,330] Trial 8 finished with value: 0.9747857583300265 and parameters: {'n_estimators': 167, 'learning_rate': 0.19169580090696664, 'num_leaves': 88, 'max_depth': 9, 'min_child_samples': 36, 'subsample': 0.7103783370631831, 'colsample_bytree': 0.9139069528816008, 'reg_alpha': 0.24756869698610517, 'reg_lambda': 0.7989935732125366, 'min_split_gain': 0.5024385874424508, 'max_bin': 179}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.610709 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.631931 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.607905 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.493532 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.486689 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:26:43,381] Trial 9 finished with value: 0.9740411711559812 and parameters: {'n_estimators': 231, 'learning_rate': 0.24098483294680584, 'num_leaves': 26, 'max_depth': 10, 'min_child_samples': 24, 'subsample': 0.8722294840697964, 'colsample_bytree': 0.6836984427883097, 'reg_alpha': 0.2761790786058489, 'reg_lambda': 0.011434490746517145, 'min_split_gain': 0.13552109321280137, 'max_bin': 188}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.609005 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.649220 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.609636 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.627628 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.633576 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:30:42,185] Trial 10 finished with value: 0.953560750615941 and parameters: {'n_estimators': 283, 'learning_rate': 0.030262561631749924, 'num_leaves': 113, 'max_depth': 7, 'min_child_samples': 50, 'subsample': 0.9888054059036203, 'colsample_bytree': 0.518846844658693, 'reg_alpha': 0.6383538037235481, 'reg_lambda': 0.0007635980121085684, 'min_split_gain': 0.9410798120132586, 'max_bin': 408}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.652205 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.616857 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.641211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.615506 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.628029 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:31:43,207] Trial 11 finished with value: 0.977404631495908 and parameters: {'n_estimators': 104, 'learning_rate': 0.28900557741522714, 'num_leaves': 144, 'max_depth': 12, 'min_child_samples': 16, 'subsample': 0.506057380188459, 'colsample_bytree': 0.7883415329417179, 'reg_alpha': 0.6820054698707683, 'reg_lambda': 0.5960483129747951, 'min_split_gain': 0.3959148832009285, 'max_bin': 266}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.649832 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.488954 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.628959 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.615609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.639303 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:32:50,675] Trial 12 finished with value: 0.9760742097572214 and parameters: {'n_estimators': 240, 'learning_rate': 0.2942104539510968, 'num_leaves': 117, 'max_depth': 13, 'min_child_samples': 43, 'subsample': 0.6563770603692286, 'colsample_bytree': 0.7825528035151869, 'reg_alpha': 0.9735313888917482, 'reg_lambda': 0.6049980059828692, 'min_split_gain': 0.6037195950383827, 'max_bin': 135}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.491167 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.480187 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.474237 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.627909 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.594472 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:34:49,592] Trial 13 finished with value: 0.9757977088624523 and parameters: {'n_estimators': 120, 'learning_rate': 0.11785236649693336, 'num_leaves': 131, 'max_depth': 14, 'min_child_samples': 17, 'subsample': 0.7775876550060382, 'colsample_bytree': 0.5894128890927031, 'reg_alpha': 0.8083214163471965, 'reg_lambda': 0.35922872180308385, 'min_split_gain': 0.050608391449176504, 'max_bin': 412}. Best is trial 0 with value: 0.9778672492956474.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.637173 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.637958 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.654099 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.655690 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.662046 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:36:02,200] Trial 14 finished with value: 0.9791907832377363 and parameters: {'n_estimators': 207, 'learning_rate': 0.2178709194786927, 'num_leaves': 102, 'max_depth': 11, 'min_child_samples': 37, 'subsample': 0.6216419142277873, 'colsample_bytree': 0.8331881733465548, 'reg_alpha': 0.5685103951166917, 'reg_lambda': 0.6964617457045448, 'min_split_gain': 0.23192006716189195, 'max_bin': 362}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.661784 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.641285 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.642883 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.641612 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.652244 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:37:12,819] Trial 15 finished with value: 0.9751402716346724 and parameters: {'n_estimators': 259, 'learning_rate': 0.20002474675074142, 'num_leaves': 100, 'max_depth': 7, 'min_child_samples': 41, 'subsample': 0.6147214808775134, 'colsample_bytree': 0.7171323141005046, 'reg_alpha': 0.5188817439320302, 'reg_lambda': 0.7754261163615953, 'min_split_gain': 0.23015001838668359, 'max_bin': 432}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.673749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.636327 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.662830 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.663278 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.666879 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:38:43,017] Trial 16 finished with value: 0.978121677417714 and parameters: {'n_estimators': 201, 'learning_rate': 0.22164919481237713, 'num_leaves': 56, 'max_depth': 11, 'min_child_samples': 40, 'subsample': 0.5208988445368454, 'colsample_bytree': 0.6185782917576876, 'reg_alpha': 0.552722619769245, 'reg_lambda': 0.4327284083660671, 'min_split_gain': 0.01485193403156096, 'max_bin': 365}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.658426 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.658035 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.645353 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.639807 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.658333 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:41:24,778] Trial 17 finished with value: 0.9786601241927599 and parameters: {'n_estimators': 203, 'learning_rate': 0.22044600339051257, 'num_leaves': 59, 'max_depth': 11, 'min_child_samples': 36, 'subsample': 0.5025063530429894, 'colsample_bytree': 0.5865364177465247, 'reg_alpha': 0.40952505108882925, 'reg_lambda': 0.45176314364086306, 'min_split_gain': 0.004004469214769983, 'max_bin': 357}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.657455 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.647386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.664232 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.663032 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.643932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:44:31,532] Trial 18 finished with value: 0.9664759501636684 and parameters: {'n_estimators': 206, 'learning_rate': 0.08020558645150287, 'num_leaves': 61, 'max_depth': 8, 'min_child_samples': 36, 'subsample': 0.615707336688092, 'colsample_bytree': 0.5208835047345993, 'reg_alpha': 0.4153040850869205, 'reg_lambda': 0.6967986372961679, 'min_split_gain': 0.1745872576673727, 'max_bin': 348}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.674662 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.566243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.597522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.588438 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.602442 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:45:39,198] Trial 19 finished with value: 0.9753709159671752 and parameters: {'n_estimators': 202, 'learning_rate': 0.21777553492109938, 'num_leaves': 37, 'max_depth': 10, 'min_child_samples': 31, 'subsample': 0.8032656583839609, 'colsample_bytree': 0.8084650070676274, 'reg_alpha': 0.1915638496563129, 'reg_lambda': 0.46809098381225395, 'min_split_gain': 0.0988261362002758, 'max_bin': 243}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.617676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.631820 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.604300 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.580523 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.640481 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:47:41,986] Trial 20 finished with value: 0.9596800219883477 and parameters: {'n_estimators': 124, 'learning_rate': 0.1589266018176683, 'num_leaves': 70, 'max_depth': 6, 'min_child_samples': 43, 'subsample': 0.6067843666710665, 'colsample_bytree': 0.577232119130257, 'reg_alpha': 0.37100502818678815, 'reg_lambda': 0.9268489356740525, 'min_split_gain': 0.2680720049130104, 'max_bin': 338}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.753066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.651610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.641211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.641797 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.667607 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:49:22,137] Trial 21 finished with value: 0.9770709021247 and parameters: {'n_estimators': 200, 'learning_rate': 0.21565657481930173, 'num_leaves': 47, 'max_depth': 11, 'min_child_samples': 39, 'subsample': 0.5000110479466437, 'colsample_bytree': 0.6079767279455647, 'reg_alpha': 0.5370727366232624, 'reg_lambda': 0.44346441552131993, 'min_split_gain': 0.007076606621839906, 'max_bin': 385}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.576256 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.577403 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.652168 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.661250 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.646759 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:50:33,216] Trial 22 finished with value: 0.9790089992052359 and parameters: {'n_estimators': 193, 'learning_rate': 0.22359924232458306, 'num_leaves': 72, 'max_depth': 11, 'min_child_samples': 32, 'subsample': 0.5298232871695957, 'colsample_bytree': 0.740540332554847, 'reg_alpha': 0.45365573998977404, 'reg_lambda': 0.2857599840227201, 'min_split_gain': 0.007084674966286064, 'max_bin': 322}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.654221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.659719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.660522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.669691 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.677713 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:51:38,808] Trial 23 finished with value: 0.9787610829770129 and parameters: {'n_estimators': 142, 'learning_rate': 0.26764433092631, 'num_leaves': 98, 'max_depth': 12, 'min_child_samples': 33, 'subsample': 0.594830275281484, 'colsample_bytree': 0.7469964595334898, 'reg_alpha': 0.4527099534649717, 'reg_lambda': 0.2703148704094304, 'min_split_gain': 0.17186944349508157, 'max_bin': 277}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.676717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.649226 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.662579 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.642583 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.651718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:52:45,836] Trial 24 finished with value: 0.9790365770281623 and parameters: {'n_estimators': 144, 'learning_rate': 0.26992015266455116, 'num_leaves': 99, 'max_depth': 13, 'min_child_samples': 31, 'subsample': 0.6399638206299125, 'colsample_bytree': 0.8261766060003678, 'reg_alpha': 0.46605954090002116, 'reg_lambda': 0.20353628048835926, 'min_split_gain': 0.1797455535223184, 'max_bin': 265}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.584436 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.573611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.558606 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.587883 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.661684 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:53:51,480] Trial 25 finished with value: 0.9784841382283688 and parameters: {'n_estimators': 141, 'learning_rate': 0.2779249712797444, 'num_leaves': 105, 'max_depth': 13, 'min_child_samples': 19, 'subsample': 0.6440646707070088, 'colsample_bytree': 0.8384680708686254, 'reg_alpha': 0.6600937908177114, 'reg_lambda': 0.13134262802493252, 'min_split_gain': 0.3537400835316646, 'max_bin': 229}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.651748 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.615600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.579962 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.586328 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.679676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:54:51,530] Trial 26 finished with value: 0.9751494570915435 and parameters: {'n_estimators': 97, 'learning_rate': 0.24232971657972854, 'num_leaves': 73, 'max_depth': 13, 'min_child_samples': 28, 'subsample': 0.7157510302968542, 'colsample_bytree': 0.816090603976128, 'reg_alpha': 0.316020981305978, 'reg_lambda': 0.1933443389122157, 'min_split_gain': 0.1114227856672406, 'max_bin': 331}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.598429 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.591738 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.587281 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.614967 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.533319 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:55:59,419] Trial 27 finished with value: 0.9748388242873945 and parameters: {'n_estimators': 187, 'learning_rate': 0.14497000876807153, 'num_leaves': 95, 'max_depth': 9, 'min_child_samples': 31, 'subsample': 0.7107852862850306, 'colsample_bytree': 0.733659232553604, 'reg_alpha': 0.602598693864617, 'reg_lambda': 0.3252721418653233, 'min_split_gain': 0.20553179593897988, 'max_bin': 269}. Best is trial 14 with value: 0.9791907832377363.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.575225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.557324 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.566573 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.665331 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.687069 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125






[I 2025-06-26 01:57:16,923] Trial 28 finished with value: 0.9799027978896954 and parameters: {'n_estimators': 247, 'learning_rate': 0.24789392177852565, 'num_leaves': 112, 'max_depth': 10, 'min_child_samples': 20, 'subsample': 0.641309849738545, 'colsample_bytree': 0.850247357879059, 'reg_alpha': 0.019603379057849146, 'reg_lambda': 0.1413264905423108, 'min_split_gain': 0.08568409914441133, 'max_bin': 314}. Best is trial 28 with value: 0.9799027978896954.


[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.569282 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7161
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.665956 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7182
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.554971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7186
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.572164 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7188
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.535006 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7172
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125


[I 2025-06-26 01:58:30,714] Trial 29 finished with value: 0.979508100688745 and parameters: {'n_estimators': 256, 'learning_rate': 0.29540040318923133, 'num_leaves': 112, 'max_depth': 14, 'min_child_samples': 14, 'subsample': 0.6398025731733176, 'colsample_bytree': 0.840825286462959, 'reg_alpha': 0.19512299717681336, 'reg_lambda': 0.08916326359567894, 'min_split_gain': 0.3024787123732208, 'max_bin': 244}. Best is trial 28 with value: 0.9799027978896954.



Best Trial for AUROC (5-fold CV):
Mean AUROC: 0.9799
Best Parameters:
  n_estimators: 247
  learning_rate: 0.24789392177852565
  num_leaves: 112
  max_depth: 10
  min_child_samples: 20
  subsample: 0.641309849738545
  colsample_bytree: 0.850247357879059
  reg_alpha: 0.019603379057849146
  reg_lambda: 0.1413264905423108
  min_split_gain: 0.08568409914441133
  max_bin: 314
[LightGBM] [Info] Number of positive: 28778, number of negative: 57556
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 2.150030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7250
[LightGBM] [Info] Number of data points in the train set: 86334, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333333 -> initscore=-0.693147
[LightGBM] [Info] Start training from score -0.693147




[LightGBM] [Info] Number of positive: 23023, number of negative: 46044
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.675153 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7170
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333343 -> initscore=-0.693104
[LightGBM] [Info] Start training from score -0.693104




[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.683646 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7183
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.716709 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7185
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169








[LightGBM] [Info] Number of positive: 23022, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.696154 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7177
[LightGBM] [Info] Number of data points in the train set: 69067, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333329 -> initscore=-0.693169
[LightGBM] [Info] Start training from score -0.693169




[LightGBM] [Info] Number of positive: 23023, number of negative: 46045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.703059 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7181
[LightGBM] [Info] Number of data points in the train set: 69068, number of used features: 2048
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333338 -> initscore=-0.693125
[LightGBM] [Info] Start training from score -0.693125





📊 Final LightGBM Model Evaluation (CV):
accuracy            : 0.9348
balanced_accuracy   : 0.9240
roc_auc             : 0.9794
precision           : 0.9110
recall              : 0.8915
mrr                 : 0.0016
precision_at_k_5    : 1.0000
hits_at_k_5         : 0.0009
precision_at_k_10   : 1.0000
hits_at_k_10        : 0.0017
precision_at_k_30   : 1.0000
hits_at_k_30        : 0.0052


## Train with AutoGluon

In [6]:
from dataset import basic_dataloader
team_name = 'nky'

In [7]:
%%time
X_test = basic_dataloader('/content/crosstalk_test_inputs.parquet', x_col="ECFP6", y_col = None, max_to_load = None, chunk_size = 20000)

Loading chunks:   0%|          | 0/17 [00:00<?, ?it/s]

CPU times: user 2min 51s, sys: 9.94 s, total: 3min 1s
Wall time: 2min 59s


In [8]:
yp = final_model.predict_proba(X_test)[:,1]





In [9]:
import pyarrow.parquet as pq
pf = pq.ParquetFile('/content/crosstalk_test_inputs.parquet')

In [10]:
pf.metadata

<pyarrow._parquet.FileMetaData object at 0x7bcc8115ccc0>
  created_by: parquet-cpp-arrow version 14.0.2
  num_columns: 12
  num_rows: 339258
  num_row_groups: 1
  format_version: 2.6
  serialized_size: 59684

In [11]:
preds = pf.read(columns=['RandomID']).to_pandas()
preds['DELLabel'] = yp
display(preds)

Unnamed: 0,RandomID,DELLabel
0,ID_0,0.045122
1,ID_1,0.017088
2,ID_2,0.000332
3,ID_3,0.005461
4,ID_4,0.022786
...,...,...
339253,ID_339253,0.000516
339254,ID_339254,0.000301
339255,ID_339255,0.003515
339256,ID_339256,0.019079




In [None]:
preds.to_csv(f'{team_name}.csv', index=False)