# Features engineer

In [5]:
import pandas as pd
import numpy as np

def quantile_normalize_across(df_list, meta_cols):
    """
    跨多個 DataFrame 做真正的 quantile normalization。
    每個 feature column 分別以所有樣本的 rank 平均值取代。
    """

    # 取出所有 features
    feature_names = [c for c in df_list[0].columns if c not in meta_cols]
    n_features = len(feature_names)

    # 檢查形狀一致
    n_rows = [len(df) for df in df_list]
    if len(set(n_rows)) != 1:
        raise ValueError(f"❌ All DataFrames must have the same number of rows. Got: {n_rows}")

    n = n_rows[0]
    print(f"🧩 Performing quantile normalization on {len(df_list)} datasets, each with {n} rows and {n_features} features")

    # 準備 feature 矩陣列表
    features = [df[feature_names].to_numpy() for df in df_list]

    # 建立一個空間保存平均排序值
    mean_sorted_all = np.zeros((n, n_features))

    # 對每個 feature column 做 rank-based 平均
    for j in range(n_features):
        # 取所有 datasets 的該 feature
        vals = np.vstack([f[:, j] for f in features])
        # 對每個 dataset 內做排序
        sorted_each = np.sort(vals, axis=1)
        # 對 rank 取平均
        mean_sorted_all[:, j] = np.mean(sorted_each, axis=0)

    # 定義函式：把平均 rank 值放回原位置
    def apply_quantile_norm(X, mean_sorted_all):
        X_norm = np.zeros_like(X)
        for j in range(X.shape[1]):
            ranks = np.argsort(np.argsort(X[:, j]))
            X_norm[:, j] = mean_sorted_all[ranks, j]
        return X_norm

    # 套用到所有 DataFrame
    normalized = [apply_quantile_norm(X, mean_sorted_all) for X in features]

    # 回存
    dfs_qn = []
    for df, norm_values in zip(df_list, normalized):
        df_qn = pd.concat([
            df[meta_cols].reset_index(drop=True),
            pd.DataFrame(norm_values, columns=feature_names)
        ], axis=1)
        dfs_qn.append(df_qn)

    return dfs_qn


In [6]:
X1 = pd.read_csv("../preprocessed_data/CAGE-merged/4. features selection/X1_all_rank_features_pruned.tsv", sep="\t")
X2 = pd.read_csv("../preprocessed_data/CAGE-merged/4. features selection/X2_all_rank_features_pruned.tsv", sep="\t")

meta_cols = ["gene_name","chr","gene_start","gene_end","TSS_start","TSS_end","strand","gex","gex_rank"]

X1_qn, X2_qn = quantile_normalize_across([X1, X2], meta_cols)

# 儲存結果
X1_qn.to_csv("../preprocessed_data/CAGE-merged/4. features selection/X1_all_rank_features_pruned_qn.tsv", sep="\t", index=False)
X2_qn.to_csv("../preprocessed_data/CAGE-merged/4. features selection/X2_all_rank_features_pruned_qn.tsv", sep="\t", index=False)


🧩 Performing quantile normalization on 2 datasets, each with 16284 rows and 277 features


In [9]:
X2_qn["DNase_gene_z_std"].sort_values()

8043     0.000000
3951     0.000000
5433     0.000000
2617     0.000000
4531     0.000000
           ...   
15661    5.572742
13669    5.928350
9566     6.239169
3391     6.800296
8552     9.069464
Name: DNase_gene_z_std, Length: 16284, dtype: float64

In [10]:
X1_qn["DNase_gene_z_std"].sort_values()


7208     0.000000
4569     0.000000
1220     0.000000
14020    0.000000
14015    0.000000
           ...   
6926     5.572742
14936    5.928350
5802     6.239169
15046    6.800296
998      9.069464
Name: DNase_gene_z_std, Length: 16284, dtype: float64

In [22]:
import os
import json
import random
import pickle   # ✅ 新增
import pandas as pd
import numpy as np
import lightgbm as lgb
from scipy.stats import spearmanr
from datetime import datetime

# ============================================================
#                  GLOBAL REPRODUCIBILITY
# ============================================================
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
print(f"🔒 Global seed set to {SEED}")

# ============================================================
#                  EXPERIMENT CONFIGURATION
# ============================================================
EXPERIMENT_DIR = "../results/lgbm/model_setting/v6"
TRAIN_PATH = "../preprocessed_data/CAGE-merged/4. features selection/X1_all_rank_features_pruned.tsv"
VAL_PATH   = "../preprocessed_data/CAGE-merged/4. features selection/X2_all_rank_features_pruned.tsv"

META_COLS = ["gene_name", "chr", "gene_start", "gene_end",
             "TSS_start", "TSS_end", "strand", "gex", "gex_rank"]
TARGET_COL = "gex_rank"

os.makedirs(EXPERIMENT_DIR, exist_ok=True)
LOG_PATH = os.path.join(EXPERIMENT_DIR, "log.txt")

# ✅ 如果已有舊的 log.txt，直接覆蓋（清空）
with open(LOG_PATH, "w") as f:
    f.write("")  # 或寫入 header，如 "==== New Experiment ====\n"

def log(msg):
    """同時印出並寫入 log.txt"""
    print(msg)
    with open(LOG_PATH, "a") as f:
        f.write(f"{msg}\n")


log(f"🚀 Experiment started at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
log(f"Experiment directory: {EXPERIMENT_DIR}")

# ============================================================
#                  DATA LOADING
# ============================================================
df_train_full = pd.read_csv(TRAIN_PATH, sep="\t")
df_val_full   = pd.read_csv(VAL_PATH, sep="\t")

feature_cols = [c for c in df_train_full.columns if c not in META_COLS]
log(f"Feature count: {len(feature_cols)}")

# ============================================================
#                  K-FOLD DEFINITION
# ============================================================
chromosomes = [f"chr{i}" for i in range(2, 23)]
folds = [chromosomes[i::5] for i in range(5)]

log("🧩 Chromosome folds:")
for i, fset in enumerate(folds):
    log(f"Fold {i+1}: {fset}")

# ============================================================
#                  LIGHTGBM PARAMETERS
# ============================================================
params = {
    "objective": "regression",          # 或 "regression_l1" 若你的目標值是 rank-based
    "metric": "rmse",                   # 或 "mae" 若你想對 outlier 更 robust
    "boosting_type": "gbdt",            # 或 "dart" 可測試 dropout-like boosting
    "learning_rate": 0.02,              # 稍微小一點，穩定泛化
    "num_leaves": 30,                   # 提升模型容量（原本16太保守）
    "max_depth": -1,                    # 讓 LightGBM 自動決定深度
    "feature_fraction": 0.85,           # 稍微降低 feature 子抽樣比例
    "bagging_fraction": 0.7,            # 稍微減少樣本抽樣比例以提升泛化
    "bagging_freq": 3,
    "lambda_l1": 0.2,                   # L1 正則化 (防止過擬合)
    "lambda_l2": 1.0,                   # L2 正則化
    "min_gain_to_split": 0.01,          # 防止生成微小分裂
    "min_data_in_leaf": 10,             # 每個 leaf 最少樣本數
    "num_boost_round": 2000,            # 訓練輪數，會配合 early stopping
    "early_stopping_round": 100,
    "seed": SEED,
    "verbosity": -1,
    "deterministic": True,
    "force_row_wise": True
}


# ============================================================
#                  SAVE CONFIG
# ============================================================
config = {
    "train_path": TRAIN_PATH,
    "val_path": VAL_PATH,
    "seed": SEED,
    "params": params,
    "folds": folds,
    "target_col": TARGET_COL,
}
with open(os.path.join(EXPERIMENT_DIR, "config.json"), "w") as f:
    json.dump(config, f, indent=4)

# ============================================================
#                  TRAINING
# ============================================================
results = []
preds_all = []

preds_all = []

for fold_idx, val_chrs in enumerate(folds):
    log(f"\n🚀 Fold {fold_idx+1}/{len(folds)} | Val chromosomes: {val_chrs}")
    fold_dir = os.path.join(EXPERIMENT_DIR, f"fold_{fold_idx+1}")
    os.makedirs(fold_dir, exist_ok=True)

    # Split
    train_chrs = [c for c in chromosomes if c not in val_chrs]
    df_train = df_train_full[df_train_full["chr"].isin(train_chrs)].copy()
    df_val   = df_val_full[df_val_full["chr"].isin(val_chrs)].copy()

    # ✅ 保留原始 index，方便之後對齊
    df_val = df_val.reset_index().rename(columns={"index": "orig_idx"})

    X_train, y_train = df_train[feature_cols], df_train[TARGET_COL]
    X_val, y_val     = df_val[feature_cols], df_val[TARGET_COL]

    dtrain = lgb.Dataset(X_train, label=y_train)
    dval   = lgb.Dataset(X_val, label=y_val, reference=dtrain)

    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dtrain, dval],
        valid_names=["train", "val"],
        num_boost_round=2000,
        callbacks=[
            lgb.early_stopping(stopping_rounds=100),
            lgb.log_evaluation(period=200)
        ],
    )

    # Save model (.pkl)
    model_path = os.path.join(fold_dir, "model.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)
    log(f"💾 Model saved: {model_path}")

    # Predict
    df_val["predicted_gex_rank"] = model.predict(X_val, num_iteration=model.best_iteration)
    df_val["fold_id"] = fold_idx + 1  # optional: fold 來源

    preds_all.append(df_val[["orig_idx", "gene_name", "predicted_gex_rank", "fold_id"]])

    # Evaluate
    overall_spearman = spearmanr(df_val[TARGET_COL], df_val["predicted_gex_rank"])[0]
    log(f"📈 Fold {fold_idx+1} Overall Spearman ρ = {overall_spearman:.4f}")
    log("📊 Per-Chromosome Spearman:")
    chr_corrs = []
    for chrom, subdf in df_val.groupby("chr"):
        if len(subdf) < 2:
            continue
        rho = spearmanr(subdf[TARGET_COL], subdf["predicted_gex_rank"])[0]
        chr_corrs.append({"chr": chrom, "spearman": rho})
        log(f"   {chrom:<6s}: ρ = {rho:.4f}")

    results.append({
        "fold": fold_idx + 1,
        "val_chr": val_chrs,
        "overall_spearman": overall_spearman,
        "per_chr": chr_corrs
    })

# ============================================================
#                  SAVE PREDICTIONS & SUMMARY
# ============================================================

# ============================================================
# 🔁 合併並依照原始順序還原
# ============================================================
df_preds_all = pd.concat(preds_all, ignore_index=True)
df_preds_all = df_preds_all.sort_values("orig_idx").reset_index(drop=True)

# 驗證對齊
assert len(df_preds_all) == len(df_val_full)
assert (df_preds_all["gene_name"].values == df_val_full["gene_name"].values).all(), \
    "❌ Prediction order mismatch with df_val_full!"

# ✅ 輸出只有 gene_name + predicted_gex_rank
df_preds_all[["gene_name", "predicted_gex_rank"]].to_csv(
    os.path.join(EXPERIMENT_DIR, "predict_val.tsv"), sep="\t", index=False
)
log("💾 Final predictions (aligned) saved (predict_val.tsv)")

# ✅ 全域 Spearman：用原始 df_val_full 的真值 vs 你的預測
overall_val_spearman = spearmanr(
    df_val_full["gex_rank"], 
    df_preds_all["predicted_gex_rank"]
)[0]
log(f"🌍 Overall validation Spearman (aligned) = {overall_val_spearman:.4f}")




summary_fold = pd.DataFrame([
    {"fold": r["fold"], "val_chr": ",".join(r["val_chr"]), "overall_spearman": r["overall_spearman"]}
    for r in results
])

chr_rows = []
for r in results:
    for c in r["per_chr"]:
        chr_rows.append({"fold": r["fold"], "chr": c["chr"], "spearman": c["spearman"]})
summary_chr = pd.DataFrame(chr_rows)

chr_mean = summary_chr.groupby("chr", as_index=False)["spearman"].mean()
chr_mean.rename(columns={"spearman": "mean_spearman"}, inplace=True)

# summary_output = summary_fold.merge(chr_mean, how="cross")
# summary_output.to_csv(os.path.join(EXPERIMENT_DIR, "summary.tsv"), sep="\t", index=False)

# log("💾 Summary saved (summary.tsv)")
log(f"Mean Spearman (across folds): {summary_fold['overall_spearman'].mean():.4f}")
log(f"Overall mean of per-chromosome means: {chr_mean['mean_spearman'].mean():.4f}")

log(f"\n✅ Experiment finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


🔒 Global seed set to 42
🚀 Experiment started at 2025-10-14 00:27:38
Experiment directory: ../results/lgbm/model_setting/v6
Feature count: 277
🧩 Chromosome folds:
Fold 1: ['chr2', 'chr7', 'chr12', 'chr17', 'chr22']
Fold 2: ['chr3', 'chr8', 'chr13', 'chr18']
Fold 3: ['chr4', 'chr9', 'chr14', 'chr19']
Fold 4: ['chr5', 'chr10', 'chr15', 'chr20']
Fold 5: ['chr6', 'chr11', 'chr16', 'chr21']

🚀 Fold 1/5 | Val chromosomes: ['chr2', 'chr7', 'chr12', 'chr17', 'chr22']
Training until validation scores don't improve for 100 rounds
[200]	train's rmse: 0.125484	val's rmse: 0.177744
[400]	train's rmse: 0.109478	val's rmse: 0.176585
Early stopping, best iteration is:
[356]	train's rmse: 0.112666	val's rmse: 0.176074
💾 Model saved: ../results/lgbm/model_setting/v6/fold_1/model.pkl
📈 Fold 1 Overall Spearman ρ = 0.7613
📊 Per-Chromosome Spearman:
   chr12 : ρ = 0.7767
   chr17 : ρ = 0.7536
   chr2  : ρ = 0.7537
   chr22 : ρ = 0.7444
   chr7  : ρ = 0.7695

🚀 Fold 2/5 | Val chromosomes: ['chr3', 'chr8', 'ch

In [None]:
import optuna
import lightgbm as lgb
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
import warnings, json, os, random
warnings.filterwarnings("ignore")

# ============================================================
#                  CONFIGURATION
# ============================================================
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

TRAIN_PATH = "../preprocessed_data/CAGE-merged/4. features selection/X1_all_rank_features_pruned.tsv"
VAL_PATH   = "../preprocessed_data/CAGE-merged/4. features selection/X2_all_rank_features_pruned.tsv"

META_COLS = ["gene_name", "chr", "gene_start", "gene_end",
             "TSS_start", "TSS_end", "strand", "gex", "gex_rank"]
TARGET_COL = "gex_rank"

# ============================================================
#                  LOAD DATA
# ============================================================
df_train = pd.read_csv(TRAIN_PATH, sep="\t")
df_val   = pd.read_csv(VAL_PATH, sep="\t")

# 合併訓練 + 驗證作為整體 pool（我們會從中抽 chr）
df_all = pd.concat([df_train, df_val], ignore_index=True)
chromosomes = sorted(df_all["chr"].unique())

print(f"🧬 Total chromosomes in dataset: {chromosomes}")
print(f"Total samples: {len(df_all):,}")

feature_cols = [c for c in df_all.columns if c not in META_COLS]

# ============================================================
#                  OBJECTIVE FUNCTION
# ============================================================
def objective(trial):
    """Optuna optimization objective for LightGBM hyperparameters"""

    # 1️⃣ 隨機抽取 4 個 chromosome 當 validation
    # 為每個 trial 建立獨立的隨機種子（可重現）
    rng = random.Random(SEED)
    val_chrs = rng.sample(chromosomes, 4)
    train_chrs = [c for c in chromosomes if c not in val_chrs]
    df_train_fold = df_all[df_all["chr"].isin(train_chrs)].copy()
    df_val_fold   = df_all[df_all["chr"].isin(val_chrs)].copy()

    X_train, y_train = df_train_fold[feature_cols], df_train_fold[TARGET_COL]
    X_val, y_val     = df_val_fold[feature_cols], df_val_fold[TARGET_COL]

    # 2️⃣ 定義超參數搜尋空間
    params = {
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": trial.suggest_categorical("boosting_type", ["gbdt", "dart"]),
        "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.05, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 16, 128, step=8),
        "max_depth": trial.suggest_int("max_depth", -1, 12),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.5, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "lambda_l1": trial.suggest_float("lambda_l1", 0.0, 1.0),
        "lambda_l2": trial.suggest_float("lambda_l2", 0.0, 2.0),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0.0, 0.05),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 100),
        "verbosity": -1,
        "seed": SEED,
        "deterministic": True,
        "force_row_wise": True,
    }

    # 3️⃣ 訓練模型
    dtrain = lgb.Dataset(X_train, label=y_train)
    dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)

    model = lgb.train(
        params,
        dtrain,
        valid_sets=[dval],
        num_boost_round=3000,
        callbacks=[lgb.early_stopping(stopping_rounds=100, verbose=False)],
    )

    # 4️⃣ 評估 Spearman correlation
    y_pred = model.predict(X_val, num_iteration=model.best_iteration)
    rho_overall = spearmanr(y_val, y_pred)[0]

    # 🔍 也可計算每個 chr 的平均 Spearman（可開啟）
    per_chr_rho = []
    for chrom, subdf in df_val_fold.assign(pred=y_pred).groupby("chr"):
        if len(subdf) > 2:
            per_chr_rho.append(spearmanr(subdf["gex_rank"], subdf["pred"])[0])
    mean_rho = np.nanmean(per_chr_rho)

    # 你可以切換回傳哪個：
    return mean_rho if not np.isnan(mean_rho) else rho_overall


# ============================================================
#                  RUN OPTIMIZATION
# ============================================================
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=SEED)
)
study.optimize(objective, n_trials=300, show_progress_bar=True)

# ============================================================
#                  SHOW BEST RESULT
# ============================================================
print("✅ Best Spearman:", study.best_value)
print("🏆 Best Parameters:")
for k, v in study.best_params.items():
    print(f"   {k}: {v}")

# ============================================================
#                  SAVE RESULT
# ============================================================
SAVE_DIR = "../results/lgbm/optuna_qn_random4chr"
os.makedirs(SAVE_DIR, exist_ok=True)

study.trials_dataframe().to_csv(os.path.join(SAVE_DIR, "trials_log.csv"), index=False)
with open(os.path.join(SAVE_DIR, "best_params.json"), "w") as f:
    json.dump(study.best_params, f, indent=4)

print(f"💾 Saved best params and log to {SAVE_DIR}")


[I 2025-10-14 00:35:10,754] A new study created in memory with name: no-name-1a41df6d-1e5d-461d-8503-301a45b7d576


🧬 Total chromosomes in dataset: ['chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr2', 'chr20', 'chr21', 'chr22', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9']
Total samples: 32,568


Best trial: 0. Best value: 0.715367:   0%|          | 1/300 [00:20<1:42:05, 20.49s/it]

[I 2025-10-14 00:35:31,242] Trial 0 finished with value: 0.7153666975380131 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.026975154833351143, 'num_leaves': 80, 'max_depth': 1, 'feature_fraction': 0.662397808134481, 'bagging_fraction': 0.5290418060840998, 'bagging_freq': 7, 'lambda_l1': 0.6011150117432088, 'lambda_l2': 1.416145155592091, 'min_gain_to_split': 0.0010292247147901223, 'min_data_in_leaf': 98}. Best is trial 0 with value: 0.7153666975380131.


Best trial: 1. Best value: 0.754737:   1%|          | 2/300 [00:34<1:22:38, 16.64s/it]

[I 2025-10-14 00:35:45,184] Trial 1 finished with value: 0.7547368544923448 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.007599674150654906, 'num_leaves': 32, 'max_depth': 3, 'feature_fraction': 0.8099025726528951, 'bagging_fraction': 0.7159725093210578, 'bagging_freq': 3, 'lambda_l1': 0.6118528947223795, 'lambda_l2': 0.27898772130408367, 'min_gain_to_split': 0.014607232426760909, 'min_data_in_leaf': 43}. Best is trial 1 with value: 0.7547368544923448.


Best trial: 2. Best value: 0.75688:   1%|          | 3/300 [02:45<5:41:11, 68.93s/it] 

[I 2025-10-14 00:37:56,335] Trial 2 finished with value: 0.7568799198332631 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.007918515779559376, 'num_leaves': 72, 'max_depth': 7, 'feature_fraction': 0.6185801650879991, 'bagging_fraction': 0.8037724259507192, 'bagging_freq': 2, 'lambda_l1': 0.06505159298527952, 'lambda_l2': 1.8977710745066665, 'min_gain_to_split': 0.04828160165372797, 'min_data_in_leaf': 83}. Best is trial 2 with value: 0.7568799198332631.


Best trial: 2. Best value: 0.75688:   1%|▏         | 4/300 [03:02<3:59:11, 48.48s/it]

[I 2025-10-14 00:38:13,482] Trial 3 finished with value: 0.754927019994781 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.024165903162442326, 'num_leaves': 64, 'max_depth': 0, 'feature_fraction': 0.798070764044508, 'bagging_fraction': 0.5171942605576092, 'bagging_freq': 7, 'lambda_l1': 0.2587799816000169, 'lambda_l2': 1.325044568707964, 'min_gain_to_split': 0.015585553804470548, 'min_data_in_leaf': 57}. Best is trial 2 with value: 0.7568799198332631.


Best trial: 2. Best value: 0.75688:   2%|▏         | 5/300 [03:12<2:50:33, 34.69s/it]

[I 2025-10-14 00:38:23,717] Trial 4 finished with value: 0.7568458261056382 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.046618106758907395, 'num_leaves': 104, 'max_depth': 12, 'feature_fraction': 0.9579309401710595, 'bagging_fraction': 0.7989499894055425, 'bagging_freq': 7, 'lambda_l1': 0.0884925020519195, 'lambda_l2': 0.3919657248382904, 'min_gain_to_split': 0.002261364445526903, 'min_data_in_leaf': 39}. Best is trial 2 with value: 0.7568799198332631.


Best trial: 2. Best value: 0.75688:   2%|▏         | 6/300 [03:18<2:01:46, 24.85s/it]

[I 2025-10-14 00:38:29,467] Trial 5 finished with value: 0.7538414894889531 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.033706023053513806, 'num_leaves': 56, 'max_depth': 2, 'feature_fraction': 0.8170784332632994, 'bagging_fraction': 0.5704621124873813, 'bagging_freq': 6, 'lambda_l1': 0.07455064367977082, 'lambda_l2': 1.9737738732010346, 'min_gain_to_split': 0.038612238464832874, 'min_data_in_leaf': 28}. Best is trial 2 with value: 0.7568799198332631.


Best trial: 6. Best value: 0.757612:   2%|▏         | 7/300 [05:55<5:32:37, 68.11s/it]

[I 2025-10-14 00:41:06,649] Trial 6 finished with value: 0.7576116717105731 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.025458179729092763, 'num_leaves': 96, 'max_depth': 9, 'feature_fraction': 0.6296178606936361, 'bagging_fraction': 0.6792328642721364, 'bagging_freq': 1, 'lambda_l1': 0.8631034258755935, 'lambda_l2': 1.2465962536551158, 'min_gain_to_split': 0.01654490124263246, 'min_data_in_leaf': 15}. Best is trial 6 with value: 0.7576116717105731.


Best trial: 7. Best value: 0.759086:   3%|▎         | 8/300 [08:15<7:21:40, 90.76s/it]

[I 2025-10-14 00:43:25,885] Trial 7 finished with value: 0.7590859646080659 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.026827251621760122, 'num_leaves': 88, 'max_depth': 11, 'feature_fraction': 0.7888859700647797, 'bagging_fraction': 0.5597971229691509, 'bagging_freq': 5, 'lambda_l1': 0.7607850486168974, 'lambda_l2': 1.1225543951389925, 'min_gain_to_split': 0.03854835899772805, 'min_data_in_leaf': 54}. Best is trial 7 with value: 0.7590859646080659.


Best trial: 8. Best value: 0.759894:   3%|▎         | 9/300 [08:51<5:56:57, 73.60s/it]

[I 2025-10-14 00:44:01,761] Trial 8 finished with value: 0.7598936681288462 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.005301382389479432, 'num_leaves': 24, 'max_depth': -1, 'feature_fraction': 0.8545641645055122, 'bagging_fraction': 0.6571779905381634, 'bagging_freq': 4, 'lambda_l1': 0.907566473926093, 'lambda_l2': 0.4985844582977499, 'min_gain_to_split': 0.02051914615178149, 'min_data_in_leaf': 78}. Best is trial 8 with value: 0.7598936681288462.


Best trial: 8. Best value: 0.759894:   3%|▎         | 10/300 [09:09<4:33:46, 56.64s/it]

[I 2025-10-14 00:44:20,438] Trial 9 finished with value: 0.7593280898570334 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.009743645106784238, 'num_leaves': 32, 'max_depth': 12, 'feature_fraction': 0.9232481518257668, 'bagging_fraction': 0.8167018782552118, 'bagging_freq': 7, 'lambda_l1': 0.8036720768991145, 'lambda_l2': 0.3731401177720717, 'min_gain_to_split': 0.044627949924498894, 'min_data_in_leaf': 59}. Best is trial 8 with value: 0.7598936681288462.


Best trial: 8. Best value: 0.759894:   4%|▎         | 11/300 [09:39<3:53:08, 48.40s/it]

[I 2025-10-14 00:44:50,159] Trial 10 finished with value: 0.7591914091195334 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0050695240451904405, 'num_leaves': 128, 'max_depth': 5, 'feature_fraction': 0.886067844878718, 'bagging_fraction': 0.9538323976412588, 'bagging_freq': 4, 'lambda_l1': 0.9766771359497862, 'lambda_l2': 0.6695903524464729, 'min_gain_to_split': 0.027585755040565922, 'min_data_in_leaf': 76}. Best is trial 8 with value: 0.7598936681288462.


Best trial: 8. Best value: 0.759894:   4%|▍         | 12/300 [10:00<3:12:17, 40.06s/it]

[I 2025-10-14 00:45:11,143] Trial 11 finished with value: 0.7593655983871537 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01125464824042569, 'num_leaves': 16, 'max_depth': -1, 'feature_fraction': 0.9773778853851628, 'bagging_fraction': 0.9132335190929217, 'bagging_freq': 4, 'lambda_l1': 0.7571262865901494, 'lambda_l2': 0.013906060398697961, 'min_gain_to_split': 0.027207276645573885, 'min_data_in_leaf': 72}. Best is trial 8 with value: 0.7598936681288462.


Best trial: 12. Best value: 0.760453:   4%|▍         | 13/300 [10:21<2:44:50, 34.46s/it]

[I 2025-10-14 00:45:32,718] Trial 12 finished with value: 0.760453201009108 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.014265138162796039, 'num_leaves': 16, 'max_depth': -1, 'feature_fraction': 0.9930899415141673, 'bagging_fraction': 0.9980082658737177, 'bagging_freq': 4, 'lambda_l1': 0.9942460885963612, 'lambda_l2': 0.0036895999588275365, 'min_gain_to_split': 0.026482461330550773, 'min_data_in_leaf': 76}. Best is trial 12 with value: 0.760453201009108.


Best trial: 12. Best value: 0.760453:   5%|▍         | 14/300 [10:42<2:23:50, 30.18s/it]

[I 2025-10-14 00:45:52,991] Trial 13 finished with value: 0.7598767350382971 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.015705631302127234, 'num_leaves': 16, 'max_depth': 4, 'feature_fraction': 0.7213785203554233, 'bagging_fraction': 0.9954169742310286, 'bagging_freq': 3, 'lambda_l1': 0.999472629618036, 'lambda_l2': 0.8405826438580081, 'min_gain_to_split': 0.021855932197366482, 'min_data_in_leaf': 95}. Best is trial 12 with value: 0.760453201009108.


Best trial: 12. Best value: 0.760453:   5%|▌         | 15/300 [11:22<2:38:28, 33.36s/it]

[I 2025-10-14 00:46:33,738] Trial 14 finished with value: 0.7602902482563878 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.005686413917135672, 'num_leaves': 32, 'max_depth': -1, 'feature_fraction': 0.869376242509013, 'bagging_fraction': 0.637877442871202, 'bagging_freq': 5, 'lambda_l1': 0.4488431862525139, 'lambda_l2': 0.015090055453537143, 'min_gain_to_split': 0.03214934217888981, 'min_data_in_leaf': 83}. Best is trial 12 with value: 0.760453201009108.


Best trial: 12. Best value: 0.760453:   5%|▌         | 16/300 [11:29<1:59:22, 25.22s/it]

[I 2025-10-14 00:46:40,054] Trial 15 finished with value: 0.7276967330121342 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.015264612277621286, 'num_leaves': 48, 'max_depth': 1, 'feature_fraction': 0.9913368583750904, 'bagging_fraction': 0.6112877943387895, 'bagging_freq': 5, 'lambda_l1': 0.3583902847905419, 'lambda_l2': 0.009524516083712754, 'min_gain_to_split': 0.033272062979396264, 'min_data_in_leaf': 89}. Best is trial 12 with value: 0.760453201009108.


Best trial: 16. Best value: 0.760968:   6%|▌         | 17/300 [11:53<1:57:52, 24.99s/it]

[I 2025-10-14 00:47:04,508] Trial 16 finished with value: 0.7609683276044146 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01274279283313665, 'num_leaves': 40, 'max_depth': 7, 'feature_fraction': 0.9147220349249673, 'bagging_fraction': 0.8982393444710525, 'bagging_freq': 5, 'lambda_l1': 0.4225618679760095, 'lambda_l2': 0.15104596120767105, 'min_gain_to_split': 0.035816665085451005, 'min_data_in_leaf': 66}. Best is trial 16 with value: 0.7609683276044146.


Best trial: 16. Best value: 0.760968:   6%|▌         | 18/300 [12:17<1:55:00, 24.47s/it]

[I 2025-10-14 00:47:27,761] Trial 17 finished with value: 0.7595041520288267 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.012592618071407708, 'num_leaves': 48, 'max_depth': 7, 'feature_fraction': 0.9189991401785472, 'bagging_fraction': 0.8897141011558393, 'bagging_freq': 3, 'lambda_l1': 0.2712301803544349, 'lambda_l2': 0.8251920172231035, 'min_gain_to_split': 0.039416588880006695, 'min_data_in_leaf': 67}. Best is trial 16 with value: 0.7609683276044146.


Best trial: 16. Best value: 0.760968:   6%|▋         | 19/300 [14:05<3:53:23, 49.84s/it]

[I 2025-10-14 00:49:16,690] Trial 18 finished with value: 0.7596389632714364 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.020184742917546027, 'num_leaves': 40, 'max_depth': 7, 'feature_fraction': 0.937583206930923, 'bagging_fraction': 0.8708212734909406, 'bagging_freq': 6, 'lambda_l1': 0.6398425719810309, 'lambda_l2': 0.2514317631054029, 'min_gain_to_split': 0.03217215850135967, 'min_data_in_leaf': 66}. Best is trial 16 with value: 0.7609683276044146.


Best trial: 19. Best value: 0.761328:   7%|▋         | 20/300 [14:25<3:09:57, 40.71s/it]

[I 2025-10-14 00:49:36,122] Trial 19 finished with value: 0.7613275431703359 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01775834373149064, 'num_leaves': 16, 'max_depth': 9, 'feature_fraction': 0.9935183677374444, 'bagging_fraction': 0.989597490830107, 'bagging_freq': 6, 'lambda_l1': 0.4941352886042697, 'lambda_l2': 1.6436056894989055, 'min_gain_to_split': 0.007962205769490298, 'min_data_in_leaf': 52}. Best is trial 19 with value: 0.7613275431703359.


Best trial: 19. Best value: 0.761328:   7%|▋         | 21/300 [14:36<2:28:13, 31.88s/it]

[I 2025-10-14 00:49:47,404] Trial 20 finished with value: 0.7600121282258745 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.020153398904821884, 'num_leaves': 48, 'max_depth': 9, 'feature_fraction': 0.8997667449755823, 'bagging_fraction': 0.8530376566790268, 'bagging_freq': 6, 'lambda_l1': 0.48065163540761624, 'lambda_l2': 1.494175198821833, 'min_gain_to_split': 0.011306331624924584, 'min_data_in_leaf': 46}. Best is trial 19 with value: 0.7613275431703359.


Best trial: 19. Best value: 0.761328:   7%|▋         | 22/300 [14:56<2:10:25, 28.15s/it]

[I 2025-10-14 00:50:06,864] Trial 21 finished with value: 0.7604935679296743 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01337875727548767, 'num_leaves': 16, 'max_depth': 9, 'feature_fraction': 0.9865003697330635, 'bagging_fraction': 0.9881521895502142, 'bagging_freq': 5, 'lambda_l1': 0.373448794062361, 'lambda_l2': 1.7194044659119503, 'min_gain_to_split': 0.005878949780088707, 'min_data_in_leaf': 64}. Best is trial 19 with value: 0.7613275431703359.


Best trial: 22. Best value: 0.7621:   8%|▊         | 23/300 [15:10<1:50:43, 23.98s/it]  

[I 2025-10-14 00:50:21,133] Trial 22 finished with value: 0.762100468229771 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.019330336865129066, 'num_leaves': 24, 'max_depth': 9, 'feature_fraction': 0.9542436217867867, 'bagging_fraction': 0.9436377946664403, 'bagging_freq': 5, 'lambda_l1': 0.36124534636116445, 'lambda_l2': 1.6583770380773004, 'min_gain_to_split': 0.0063587406995280924, 'min_data_in_leaf': 51}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:   8%|▊         | 24/300 [15:25<1:38:16, 21.36s/it]

[I 2025-10-14 00:50:36,383] Trial 23 finished with value: 0.7592112405032605 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.018572484403460657, 'num_leaves': 32, 'max_depth': 10, 'feature_fraction': 0.947914947663845, 'bagging_fraction': 0.937961594526056, 'bagging_freq': 6, 'lambda_l1': 0.19269415482820434, 'lambda_l2': 1.6452566030807139, 'min_gain_to_split': 0.007998509730298281, 'min_data_in_leaf': 31}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:   8%|▊         | 25/300 [15:46<1:37:17, 21.23s/it]

[I 2025-10-14 00:50:57,299] Trial 24 finished with value: 0.7601937852643205 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010777682949532538, 'num_leaves': 24, 'max_depth': 6, 'feature_fraction': 0.8572537641150577, 'bagging_fraction': 0.9505328898582166, 'bagging_freq': 5, 'lambda_l1': 0.537291697882116, 'lambda_l2': 1.6728532040004969, 'min_gain_to_split': 0.006252870753846059, 'min_data_in_leaf': 48}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:   9%|▊         | 26/300 [16:02<1:30:19, 19.78s/it]

[I 2025-10-14 00:51:13,696] Trial 25 finished with value: 0.7611337504655737 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01806935903936326, 'num_leaves': 40, 'max_depth': 8, 'feature_fraction': 0.9485195622505924, 'bagging_fraction': 0.7502780425469886, 'bagging_freq': 6, 'lambda_l1': 0.4106154249850188, 'lambda_l2': 1.0064918811674979, 'min_gain_to_split': 0.009863425806631292, 'min_data_in_leaf': 35}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:   9%|▉         | 27/300 [18:22<4:12:57, 55.60s/it]

[I 2025-10-14 00:53:32,853] Trial 26 finished with value: 0.7596126275187389 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.01795563917880385, 'num_leaves': 64, 'max_depth': 8, 'feature_fraction': 0.9555400184202256, 'bagging_fraction': 0.7484885472869306, 'bagging_freq': 6, 'lambda_l1': 0.29279813539154476, 'lambda_l2': 1.0126996279702, 'min_gain_to_split': 0.010243768325796408, 'min_data_in_leaf': 21}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:   9%|▉         | 28/300 [18:31<3:09:32, 41.81s/it]

[I 2025-10-14 00:53:42,503] Trial 27 finished with value: 0.7583311101880391 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.03501179300037419, 'num_leaves': 24, 'max_depth': 10, 'feature_fraction': 0.7624610977947756, 'bagging_fraction': 0.8443772759136309, 'bagging_freq': 6, 'lambda_l1': 0.5494129617777894, 'lambda_l2': 1.8072282385748806, 'min_gain_to_split': 0.0030505032086491405, 'min_data_in_leaf': 35}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  10%|▉         | 29/300 [18:43<2:27:33, 32.67s/it]

[I 2025-10-14 00:53:53,844] Trial 28 finished with value: 0.7590513484782127 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.023281403590623737, 'num_leaves': 40, 'max_depth': 10, 'feature_fraction': 0.9614578802212921, 'bagging_fraction': 0.7599883664125442, 'bagging_freq': 7, 'lambda_l1': 0.1900247300127622, 'lambda_l2': 1.54608357011459, 'min_gain_to_split': 0.012031929099969987, 'min_data_in_leaf': 51}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  10%|█         | 30/300 [20:00<3:27:32, 46.12s/it]

[I 2025-10-14 00:55:11,348] Trial 29 finished with value: 0.7591103473099263 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.030509910066258347, 'num_leaves': 56, 'max_depth': 5, 'feature_fraction': 0.7042256138489096, 'bagging_fraction': 0.933584698134126, 'bagging_freq': 6, 'lambda_l1': 0.6558174689667873, 'lambda_l2': 1.3626598691559415, 'min_gain_to_split': 0.00020250541477712276, 'min_data_in_leaf': 25}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  10%|█         | 31/300 [20:15<2:44:40, 36.73s/it]

[I 2025-10-14 00:55:26,170] Trial 30 finished with value: 0.7615471326770987 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.017171399313968858, 'num_leaves': 24, 'max_depth': 8, 'feature_fraction': 0.8363859891028941, 'bagging_fraction': 0.752990449188799, 'bagging_freq': 7, 'lambda_l1': 0.3759661346554937, 'lambda_l2': 1.1680630353352655, 'min_gain_to_split': 0.019089053990118898, 'min_data_in_leaf': 40}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  11%|█         | 32/300 [20:27<2:10:54, 29.31s/it]

[I 2025-10-14 00:55:38,159] Trial 31 finished with value: 0.7608408125099564 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.017493259507801277, 'num_leaves': 24, 'max_depth': 8, 'feature_fraction': 0.836385085565067, 'bagging_fraction': 0.7178586448322043, 'bagging_freq': 7, 'lambda_l1': 0.36992242914184603, 'lambda_l2': 1.189660968147026, 'min_gain_to_split': 0.018999737099007208, 'min_data_in_leaf': 41}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  11%|█         | 33/300 [20:41<1:50:03, 24.73s/it]

[I 2025-10-14 00:55:52,214] Trial 32 finished with value: 0.7619014535429264 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0219981623967029, 'num_leaves': 32, 'max_depth': 8, 'feature_fraction': 0.9020936174881491, 'bagging_fraction': 0.7751522745057279, 'bagging_freq': 7, 'lambda_l1': 0.5371375320693823, 'lambda_l2': 1.0084183890062257, 'min_gain_to_split': 0.013950674461177117, 'min_data_in_leaf': 35}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  11%|█▏        | 34/300 [20:57<1:38:07, 22.13s/it]

[I 2025-10-14 00:56:08,283] Trial 33 finished with value: 0.760799106518271 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.02153024617748958, 'num_leaves': 24, 'max_depth': 11, 'feature_fraction': 0.8796984474593005, 'bagging_fraction': 0.785124803408521, 'bagging_freq': 7, 'lambda_l1': 0.5458250676367319, 'lambda_l2': 1.4879253069007703, 'min_gain_to_split': 0.013430578042297464, 'min_data_in_leaf': 44}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  12%|█▏        | 35/300 [21:04<1:18:16, 17.72s/it]

[I 2025-10-14 00:56:15,720] Trial 34 finished with value: 0.7597109922749221 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.028474839241416177, 'num_leaves': 16, 'max_depth': 6, 'feature_fraction': 0.8942803318850722, 'bagging_fraction': 0.7058298748638637, 'bagging_freq': 7, 'lambda_l1': 0.693238642152388, 'lambda_l2': 0.8353900100150675, 'min_gain_to_split': 0.005457059553475887, 'min_data_in_leaf': 51}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  12%|█▏        | 36/300 [21:14<1:07:19, 15.30s/it]

[I 2025-10-14 00:56:25,373] Trial 35 finished with value: 0.759480016643378 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.02305175030217851, 'num_leaves': 32, 'max_depth': 8, 'feature_fraction': 0.8250758196319833, 'bagging_fraction': 0.8341466080979195, 'bagging_freq': 7, 'lambda_l1': 0.49880640613588834, 'lambda_l2': 1.831324064324646, 'min_gain_to_split': 0.016656688429711655, 'min_data_in_leaf': 37}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  12%|█▏        | 37/300 [21:23<58:27, 13.33s/it]  

[I 2025-10-14 00:56:34,115] Trial 36 finished with value: 0.7587228420512024 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.041395123319983435, 'num_leaves': 72, 'max_depth': 9, 'feature_fraction': 0.7622874227066636, 'bagging_fraction': 0.9711638705037025, 'bagging_freq': 7, 'lambda_l1': 0.5906488179307272, 'lambda_l2': 1.3036636125260197, 'min_gain_to_split': 0.022984470217314465, 'min_data_in_leaf': 56}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  13%|█▎        | 38/300 [24:41<4:59:52, 68.67s/it]

[I 2025-10-14 00:59:51,916] Trial 37 finished with value: 0.7588250865486704 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.009272669977682137, 'num_leaves': 112, 'max_depth': 11, 'feature_fraction': 0.9251066841363667, 'bagging_fraction': 0.7736215718707787, 'bagging_freq': 6, 'lambda_l1': 0.30981928483654764, 'lambda_l2': 1.1352465355739734, 'min_gain_to_split': 0.017651815394500656, 'min_data_in_leaf': 11}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  13%|█▎        | 39/300 [24:52<3:43:42, 51.43s/it]

[I 2025-10-14 01:00:03,103] Trial 38 finished with value: 0.7590244341456753 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01595312975066358, 'num_leaves': 32, 'max_depth': 6, 'feature_fraction': 0.9681734161104436, 'bagging_fraction': 0.7204101759058876, 'bagging_freq': 7, 'lambda_l1': 0.17115272280380295, 'lambda_l2': 1.4083275884408906, 'min_gain_to_split': 0.013418958001795218, 'min_data_in_leaf': 60}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  13%|█▎        | 40/300 [24:59<2:45:43, 38.24s/it]

[I 2025-10-14 01:00:10,578] Trial 39 finished with value: 0.7586490914298272 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.03437174281536693, 'num_leaves': 56, 'max_depth': 10, 'feature_fraction': 0.8415939625445528, 'bagging_fraction': 0.6835017257938918, 'bagging_freq': 6, 'lambda_l1': 0.010279375331658125, 'lambda_l2': 1.9172420312737375, 'min_gain_to_split': 0.00786077725449393, 'min_data_in_leaf': 31}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  14%|█▎        | 41/300 [26:28<3:50:47, 53.46s/it]

[I 2025-10-14 01:01:39,559] Trial 40 finished with value: 0.7611947525955696 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.02574708023761235, 'num_leaves': 24, 'max_depth': 8, 'feature_fraction': 0.7977010635086582, 'bagging_fraction': 0.8123483369984765, 'bagging_freq': 5, 'lambda_l1': 0.4612438313712995, 'lambda_l2': 1.995431495038654, 'min_gain_to_split': 0.0036657881211382545, 'min_data_in_leaf': 21}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  14%|█▍        | 42/300 [27:58<4:36:32, 64.31s/it]

[I 2025-10-14 01:03:09,177] Trial 41 finished with value: 0.7608800269906042 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.025414841352572416, 'num_leaves': 24, 'max_depth': 8, 'feature_fraction': 0.7836792722412405, 'bagging_fraction': 0.8190483042590506, 'bagging_freq': 5, 'lambda_l1': 0.4770104276298137, 'lambda_l2': 1.9868626009475707, 'min_gain_to_split': 0.002945361135011426, 'min_data_in_leaf': 20}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  14%|█▍        | 43/300 [29:13<4:49:55, 67.68s/it]

[I 2025-10-14 01:04:24,736] Trial 42 finished with value: 0.7584650151723554 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.021025300032590146, 'num_leaves': 16, 'max_depth': 9, 'feature_fraction': 0.8061337051043137, 'bagging_fraction': 0.7945579935422357, 'bagging_freq': 2, 'lambda_l1': 0.3366785267185486, 'lambda_l2': 1.5913102531396717, 'min_gain_to_split': 0.003996802171782244, 'min_data_in_leaf': 41}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  15%|█▍        | 44/300 [30:54<5:30:45, 77.52s/it]

[I 2025-10-14 01:06:05,214] Trial 43 finished with value: 0.7598940186205422 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.029651908338129546, 'num_leaves': 32, 'max_depth': 7, 'feature_fraction': 0.734863371756646, 'bagging_fraction': 0.8688834898134513, 'bagging_freq': 5, 'lambda_l1': 0.5849959525414471, 'lambda_l2': 1.7706111354891128, 'min_gain_to_split': 0.015351562010678183, 'min_data_in_leaf': 26}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  15%|█▌        | 45/300 [32:46<6:14:02, 88.01s/it]

[I 2025-10-14 01:07:57,687] Trial 44 finished with value: 0.7588634777357246 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.02378042679052614, 'num_leaves': 24, 'max_depth': 12, 'feature_fraction': 0.9040193318710618, 'bagging_fraction': 0.9206482669465309, 'bagging_freq': 6, 'lambda_l1': 0.39755501316466973, 'lambda_l2': 0.6508018359766699, 'min_gain_to_split': 0.00813222330104052, 'min_data_in_leaf': 21}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  15%|█▌        | 46/300 [33:59<5:52:25, 83.25s/it]

[I 2025-10-14 01:09:09,832] Trial 45 finished with value: 0.7557872121404545 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.015804618810685202, 'num_leaves': 16, 'max_depth': 9, 'feature_fraction': 0.7722479751453118, 'bagging_fraction': 0.5021328242984184, 'bagging_freq': 4, 'lambda_l1': 0.23666576690520383, 'lambda_l2': 1.2704660578540043, 'min_gain_to_split': 0.0004730841550610442, 'min_data_in_leaf': 50}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  16%|█▌        | 47/300 [34:16<4:27:47, 63.51s/it]

[I 2025-10-14 01:09:27,283] Trial 46 finished with value: 0.7570682893819947 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.019245823754223665, 'num_leaves': 80, 'max_depth': 11, 'feature_fraction': 0.999172986241833, 'bagging_fraction': 0.73634556207071, 'bagging_freq': 7, 'lambda_l1': 0.44158572782190625, 'lambda_l2': 1.0843796497168108, 'min_gain_to_split': 0.01393341979423883, 'min_data_in_leaf': 16}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  16%|█▌        | 48/300 [36:11<5:31:09, 78.85s/it]

[I 2025-10-14 01:11:21,917] Trial 47 finished with value: 0.7606515896379109 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.02763595515005643, 'num_leaves': 40, 'max_depth': 7, 'feature_fraction': 0.8699063001372084, 'bagging_fraction': 0.9670370087837056, 'bagging_freq': 1, 'lambda_l1': 0.5294114239801778, 'lambda_l2': 1.8993281326260711, 'min_gain_to_split': 0.024140380129267617, 'min_data_in_leaf': 31}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  16%|█▋        | 49/300 [36:15<3:56:44, 56.59s/it]

[I 2025-10-14 01:11:26,584] Trial 48 finished with value: 0.7574707919716597 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.041566567844272505, 'num_leaves': 24, 'max_depth': 3, 'feature_fraction': 0.8151133365238067, 'bagging_fraction': 0.6974872730091233, 'bagging_freq': 5, 'lambda_l1': 0.4607470013592149, 'lambda_l2': 0.8936382030464106, 'min_gain_to_split': 0.009305623633210521, 'min_data_in_leaf': 54}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  17%|█▋        | 50/300 [36:35<3:09:34, 45.50s/it]

[I 2025-10-14 01:11:46,187] Trial 49 finished with value: 0.7577198377937355 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.014408209964549239, 'num_leaves': 32, 'max_depth': 5, 'feature_fraction': 0.9680207991432254, 'bagging_fraction': 0.6553949168985925, 'bagging_freq': 4, 'lambda_l1': 0.6891570775447955, 'lambda_l2': 0.6218752559700783, 'min_gain_to_split': 0.019800145045052675, 'min_data_in_leaf': 45}. Best is trial 22 with value: 0.762100468229771.


Best trial: 22. Best value: 0.7621:  17%|█▋        | 51/300 [36:54<2:36:26, 37.70s/it]

[I 2025-10-14 01:12:05,680] Trial 50 finished with value: 0.7618955638754228 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.016463169705379355, 'num_leaves': 48, 'max_depth': 8, 'feature_fraction': 0.6805431395273667, 'bagging_fraction': 0.8172934724939541, 'bagging_freq': 6, 'lambda_l1': 0.3432648625199339, 'lambda_l2': 0.7427446502932107, 'min_gain_to_split': 0.04871605980860741, 'min_data_in_leaf': 39}. Best is trial 22 with value: 0.762100468229771.


Best trial: 51. Best value: 0.762159:  17%|█▋        | 52/300 [37:13<2:12:34, 32.08s/it]

[I 2025-10-14 01:12:24,647] Trial 51 finished with value: 0.7621592404227595 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.016676974956976915, 'num_leaves': 48, 'max_depth': 8, 'feature_fraction': 0.64561553423692, 'bagging_fraction': 0.8113835038425429, 'bagging_freq': 6, 'lambda_l1': 0.3316673054635859, 'lambda_l2': 0.8969317795206216, 'min_gain_to_split': 0.04923442843722911, 'min_data_in_leaf': 38}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  18%|█▊        | 53/300 [37:27<1:48:45, 26.42s/it]

[I 2025-10-14 01:12:37,862] Trial 52 finished with value: 0.7595209345983386 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.017231380864997377, 'num_leaves': 64, 'max_depth': 10, 'feature_fraction': 0.6040159924904394, 'bagging_fraction': 0.7827715615249958, 'bagging_freq': 6, 'lambda_l1': 0.2358727512839272, 'lambda_l2': 0.936770294692029, 'min_gain_to_split': 0.04956706669011591, 'min_data_in_leaf': 37}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  18%|█▊        | 54/300 [37:48<1:42:31, 25.00s/it]

[I 2025-10-14 01:12:59,569] Trial 53 finished with value: 0.7616901745016831 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011921341398236404, 'num_leaves': 48, 'max_depth': 9, 'feature_fraction': 0.656766765318074, 'bagging_fraction': 0.8942251204186741, 'bagging_freq': 7, 'lambda_l1': 0.33104102597332724, 'lambda_l2': 0.7305037502801699, 'min_gain_to_split': 0.045098272498152035, 'min_data_in_leaf': 43}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  18%|█▊        | 55/300 [38:11<1:39:10, 24.29s/it]

[I 2025-10-14 01:13:22,180] Trial 54 finished with value: 0.7609910225575284 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.012220434647356414, 'num_leaves': 48, 'max_depth': 7, 'feature_fraction': 0.6516708729713426, 'bagging_fraction': 0.8875475465734309, 'bagging_freq': 7, 'lambda_l1': 0.33190090720114923, 'lambda_l2': 0.7519976341190785, 'min_gain_to_split': 0.04609499319806543, 'min_data_in_leaf': 41}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  19%|█▊        | 56/300 [38:30<1:32:33, 22.76s/it]

[I 2025-10-14 01:13:41,372] Trial 55 finished with value: 0.7604553137974255 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.014269098087626806, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6804909229824567, 'bagging_fraction': 0.8230273753359725, 'bagging_freq': 7, 'lambda_l1': 0.27965685238285387, 'lambda_l2': 0.764889636716016, 'min_gain_to_split': 0.04252262400640299, 'min_data_in_leaf': 33}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  19%|█▉        | 57/300 [39:04<1:45:47, 26.12s/it]

[I 2025-10-14 01:14:15,336] Trial 56 finished with value: 0.7600887921907821 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.007797665540129114, 'num_leaves': 48, 'max_depth': 6, 'feature_fraction': 0.6557984683936069, 'bagging_fraction': 0.9077469431651026, 'bagging_freq': 7, 'lambda_l1': 0.39672576888634536, 'lambda_l2': 0.5745095991133349, 'min_gain_to_split': 0.04715840627365013, 'min_data_in_leaf': 47}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  19%|█▉        | 58/300 [39:28<1:42:40, 25.46s/it]

[I 2025-10-14 01:14:39,250] Trial 57 finished with value: 0.7604799531467726 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011099466751137129, 'num_leaves': 64, 'max_depth': 9, 'feature_fraction': 0.627246843359069, 'bagging_fraction': 0.8689768361857546, 'bagging_freq': 7, 'lambda_l1': 0.11856461141688593, 'lambda_l2': 0.5002888804995314, 'min_gain_to_split': 0.04285667928777643, 'min_data_in_leaf': 38}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  20%|█▉        | 59/300 [39:54<1:42:44, 25.58s/it]

[I 2025-10-14 01:15:05,104] Trial 58 finished with value: 0.7603483647813438 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.00953183653718513, 'num_leaves': 40, 'max_depth': 10, 'feature_fraction': 0.6761948754175291, 'bagging_fraction': 0.7951619726080157, 'bagging_freq': 6, 'lambda_l1': 0.3173731748056722, 'lambda_l2': 0.9463776649065496, 'min_gain_to_split': 0.04422610119419062, 'min_data_in_leaf': 28}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  20%|██        | 60/300 [40:12<1:33:07, 23.28s/it]

[I 2025-10-14 01:15:23,032] Trial 59 finished with value: 0.7602807907414493 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.016166623894423474, 'num_leaves': 40, 'max_depth': 7, 'feature_fraction': 0.6457626847276784, 'bagging_fraction': 0.765698272980443, 'bagging_freq': 7, 'lambda_l1': 0.2376567913490551, 'lambda_l2': 0.7270489212420109, 'min_gain_to_split': 0.04916400691750974, 'min_data_in_leaf': 59}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  20%|██        | 61/300 [40:37<1:34:34, 23.74s/it]

[I 2025-10-14 01:15:47,849] Trial 60 finished with value: 0.7614732026786157 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.013666304884424977, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6905706074773964, 'bagging_fraction': 0.7340540135370015, 'bagging_freq': 6, 'lambda_l1': 0.36341929549514385, 'lambda_l2': 1.059785184262888, 'min_gain_to_split': 0.04688111176201612, 'min_data_in_leaf': 43}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  21%|██        | 62/300 [40:56<1:29:36, 22.59s/it]

[I 2025-10-14 01:16:07,747] Trial 61 finished with value: 0.7618435191594226 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011822530831957032, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6955145798540214, 'bagging_fraction': 0.734500220768864, 'bagging_freq': 6, 'lambda_l1': 0.3518050553627748, 'lambda_l2': 1.0832007487381339, 'min_gain_to_split': 0.04755183406746644, 'min_data_in_leaf': 43}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  21%|██        | 63/300 [41:19<1:29:36, 22.68s/it]

[I 2025-10-14 01:16:30,653] Trial 62 finished with value: 0.7616881706680119 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010233060183769724, 'num_leaves': 48, 'max_depth': 9, 'feature_fraction': 0.6352334476769957, 'bagging_fraction': 0.8457357811345867, 'bagging_freq': 6, 'lambda_l1': 0.4161751693850546, 'lambda_l2': 1.1677228391218724, 'min_gain_to_split': 0.044736927867985256, 'min_data_in_leaf': 48}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  21%|██▏       | 64/300 [41:53<1:42:29, 26.06s/it]

[I 2025-10-14 01:17:04,574] Trial 63 finished with value: 0.7606368197590825 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.008597732255276025, 'num_leaves': 48, 'max_depth': 9, 'feature_fraction': 0.6374979308989358, 'bagging_fraction': 0.8448428143734342, 'bagging_freq': 6, 'lambda_l1': 0.42579305100079057, 'lambda_l2': 0.8814925950702148, 'min_gain_to_split': 0.03999003139863472, 'min_data_in_leaf': 48}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  22%|██▏       | 65/300 [42:22<1:44:55, 26.79s/it]

[I 2025-10-14 01:17:33,083] Trial 64 finished with value: 0.7608208065090623 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010304834691253291, 'num_leaves': 64, 'max_depth': 10, 'feature_fraction': 0.6122062982507189, 'bagging_fraction': 0.853056305360039, 'bagging_freq': 5, 'lambda_l1': 0.34489137505886486, 'lambda_l2': 0.4690946122913286, 'min_gain_to_split': 0.04494645111380501, 'min_data_in_leaf': 44}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  22%|██▏       | 66/300 [43:15<2:15:09, 34.66s/it]

[I 2025-10-14 01:18:26,087] Trial 65 finished with value: 0.760351991101786 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.006709390148715474, 'num_leaves': 56, 'max_depth': 9, 'feature_fraction': 0.709426139213865, 'bagging_fraction': 0.8061375372851749, 'bagging_freq': 5, 'lambda_l1': 0.26471556348072156, 'lambda_l2': 1.2394605334564122, 'min_gain_to_split': 0.048240178584485836, 'min_data_in_leaf': 35}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  22%|██▏       | 67/300 [43:34<1:56:29, 30.00s/it]

[I 2025-10-14 01:18:45,224] Trial 66 finished with value: 0.7607126103712939 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01184434350106876, 'num_leaves': 48, 'max_depth': 7, 'feature_fraction': 0.670771059073354, 'bagging_fraction': 0.8782689261167677, 'bagging_freq': 6, 'lambda_l1': 0.2987483785990552, 'lambda_l2': 1.0641254892026484, 'min_gain_to_split': 0.04095057859985331, 'min_data_in_leaf': 53}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  23%|██▎       | 68/300 [44:15<2:08:31, 33.24s/it]

[I 2025-10-14 01:19:26,028] Trial 67 finished with value: 0.7608035817367209 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.00835800397410646, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.7330979602482803, 'bagging_fraction': 0.9313433127941226, 'bagging_freq': 6, 'lambda_l1': 0.3988293410332871, 'lambda_l2': 0.9672668899571703, 'min_gain_to_split': 0.0371591610866824, 'min_data_in_leaf': 62}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  23%|██▎       | 69/300 [44:38<1:55:54, 30.11s/it]

[I 2025-10-14 01:19:48,818] Trial 68 finished with value: 0.7603284643638799 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.013238669619297207, 'num_leaves': 80, 'max_depth': 9, 'feature_fraction': 0.624103731632951, 'bagging_fraction': 0.8291788259358721, 'bagging_freq': 6, 'lambda_l1': 0.5179117162793069, 'lambda_l2': 0.8128271412686264, 'min_gain_to_split': 0.0422732112430644, 'min_data_in_leaf': 48}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  23%|██▎       | 70/300 [45:14<2:02:17, 31.90s/it]

[I 2025-10-14 01:20:24,915] Trial 69 finished with value: 0.7609060312973701 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010343591880747027, 'num_leaves': 48, 'max_depth': 10, 'feature_fraction': 0.6383836828129978, 'bagging_fraction': 0.8986071326457579, 'bagging_freq': 5, 'lambda_l1': 0.4305040627259523, 'lambda_l2': 0.7058641698428003, 'min_gain_to_split': 0.029784219581411153, 'min_data_in_leaf': 39}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  24%|██▎       | 71/300 [45:29<1:43:20, 27.08s/it]

[I 2025-10-14 01:20:40,729] Trial 70 finished with value: 0.7618084382555501 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.019613795337830767, 'num_leaves': 40, 'max_depth': 0, 'feature_fraction': 0.6611962519942557, 'bagging_fraction': 0.8560273365131689, 'bagging_freq': 6, 'lambda_l1': 0.15588628598089563, 'lambda_l2': 1.2023089163903342, 'min_gain_to_split': 0.04994150197388448, 'min_data_in_leaf': 56}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  24%|██▍       | 72/300 [45:40<1:23:40, 22.02s/it]

[I 2025-10-14 01:20:50,953] Trial 71 finished with value: 0.7524626247070274 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.021611885796176036, 'num_leaves': 40, 'max_depth': 2, 'feature_fraction': 0.6613012613563571, 'bagging_fraction': 0.852085569706051, 'bagging_freq': 6, 'lambda_l1': 0.10584234641476181, 'lambda_l2': 1.2062148312147867, 'min_gain_to_split': 0.04812544399249517, 'min_data_in_leaf': 50}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  24%|██▍       | 73/300 [45:54<1:14:20, 19.65s/it]

[I 2025-10-14 01:21:05,070] Trial 72 finished with value: 0.759128599940814 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01945353603842571, 'num_leaves': 40, 'max_depth': 4, 'feature_fraction': 0.6891504317353031, 'bagging_fraction': 0.8358204835062232, 'bagging_freq': 6, 'lambda_l1': 0.17970875012062457, 'lambda_l2': 1.0224691869194622, 'min_gain_to_split': 0.049891313102270116, 'min_data_in_leaf': 55}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  25%|██▍       | 74/300 [46:22<1:23:13, 22.10s/it]

[I 2025-10-14 01:21:32,876] Trial 73 finished with value: 0.7607768992287472 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011738784607647823, 'num_leaves': 48, 'max_depth': 0, 'feature_fraction': 0.665088979047533, 'bagging_fraction': 0.7730108395239759, 'bagging_freq': 6, 'lambda_l1': 0.15122010969949198, 'lambda_l2': 1.118310261789937, 'min_gain_to_split': 0.04505560481958989, 'min_data_in_leaf': 70}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  25%|██▌       | 75/300 [46:59<1:40:23, 26.77s/it]

[I 2025-10-14 01:22:10,548] Trial 74 finished with value: 0.7609463255305602 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.007190544629659596, 'num_leaves': 32, 'max_depth': 11, 'feature_fraction': 0.7088485463472082, 'bagging_fraction': 0.8603513780703592, 'bagging_freq': 7, 'lambda_l1': 0.20492665618372444, 'lambda_l2': 0.791973902945219, 'min_gain_to_split': 0.04624016177369343, 'min_data_in_leaf': 43}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  25%|██▌       | 76/300 [47:22<1:35:15, 25.51s/it]

[I 2025-10-14 01:22:33,133] Trial 75 finished with value: 0.7601534987598524 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.014693529596473267, 'num_leaves': 48, 'max_depth': 6, 'feature_fraction': 0.6112028285478591, 'bagging_fraction': 0.5718240649889108, 'bagging_freq': 5, 'lambda_l1': 0.040313097258755384, 'lambda_l2': 0.8958433651119682, 'min_gain_to_split': 0.04402482464348455, 'min_data_in_leaf': 33}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  26%|██▌       | 77/300 [47:35<1:21:06, 21.82s/it]

[I 2025-10-14 01:22:46,343] Trial 76 finished with value: 0.7612235826533047 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.02239693262931345, 'num_leaves': 72, 'max_depth': 0, 'feature_fraction': 0.69545128119041, 'bagging_fraction': 0.8831718325536444, 'bagging_freq': 6, 'lambda_l1': 0.57202663187511, 'lambda_l2': 0.5804130764361555, 'min_gain_to_split': 0.047218977838734084, 'min_data_in_leaf': 56}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  26%|██▌       | 78/300 [47:48<1:11:17, 19.27s/it]

[I 2025-10-14 01:22:59,645] Trial 77 finished with value: 0.7596948630749998 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.02033907880280858, 'num_leaves': 40, 'max_depth': 8, 'feature_fraction': 0.6387122746923412, 'bagging_fraction': 0.9493118867538867, 'bagging_freq': 7, 'lambda_l1': 0.3752384018910711, 'lambda_l2': 1.3423864121466123, 'min_gain_to_split': 0.04836774589526382, 'min_data_in_leaf': 41}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  26%|██▋       | 79/300 [48:05<1:07:32, 18.34s/it]

[I 2025-10-14 01:23:15,818] Trial 78 finished with value: 0.7606358624943993 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.018604924155140496, 'num_leaves': 32, 'max_depth': 7, 'feature_fraction': 0.7222926598462829, 'bagging_fraction': 0.8006663237453906, 'bagging_freq': 6, 'lambda_l1': 0.29575360663702666, 'lambda_l2': 1.435184412094379, 'min_gain_to_split': 0.045679901497831514, 'min_data_in_leaf': 47}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  27%|██▋       | 80/300 [48:26<1:10:52, 19.33s/it]

[I 2025-10-14 01:23:37,465] Trial 79 finished with value: 0.7615899077198097 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.016760315914754582, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6789581897860588, 'bagging_fraction': 0.9151272221047465, 'bagging_freq': 4, 'lambda_l1': 0.21710955639829718, 'lambda_l2': 1.1515274304049974, 'min_gain_to_split': 0.035237400775344305, 'min_data_in_leaf': 36}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  27%|██▋       | 81/300 [48:52<1:17:25, 21.21s/it]

[I 2025-10-14 01:24:03,066] Trial 80 finished with value: 0.7601930058502402 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.012786300088648507, 'num_leaves': 32, 'max_depth': 9, 'feature_fraction': 0.647731471959575, 'bagging_fraction': 0.8129512143984616, 'bagging_freq': 5, 'lambda_l1': 0.31802112354151607, 'lambda_l2': 0.8647486694989084, 'min_gain_to_split': 0.048242195203865794, 'min_data_in_leaf': 50}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  27%|██▋       | 82/300 [49:14<1:18:38, 21.64s/it]

[I 2025-10-14 01:24:25,715] Trial 81 finished with value: 0.7608407646332809 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.015075558220039753, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6817236877876203, 'bagging_fraction': 0.9067750264586761, 'bagging_freq': 3, 'lambda_l1': 0.3504183808449183, 'lambda_l2': 1.1637487145207126, 'min_gain_to_split': 0.034098842759837475, 'min_data_in_leaf': 35}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  28%|██▊       | 83/300 [49:33<1:15:06, 20.77s/it]

[I 2025-10-14 01:24:44,443] Trial 82 finished with value: 0.7593129133441865 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0168280005450169, 'num_leaves': 64, 'max_depth': 9, 'feature_fraction': 0.6659032707361686, 'bagging_fraction': 0.9291831741867088, 'bagging_freq': 4, 'lambda_l1': 0.13290763675595593, 'lambda_l2': 0.9935064233088466, 'min_gain_to_split': 0.03736723768765781, 'min_data_in_leaf': 58}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  28%|██▊       | 84/300 [49:50<1:10:10, 19.49s/it]

[I 2025-10-14 01:25:00,961] Trial 83 finished with value: 0.7606660897572006 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01923395093955259, 'num_leaves': 48, 'max_depth': 8, 'feature_fraction': 0.933439178849793, 'bagging_fraction': 0.7383608663024314, 'bagging_freq': 4, 'lambda_l1': 0.22429257801818375, 'lambda_l2': 1.2890773863571303, 'min_gain_to_split': 0.04310065235078357, 'min_data_in_leaf': 45}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  28%|██▊       | 85/300 [50:08<1:08:32, 19.13s/it]

[I 2025-10-14 01:25:19,239] Trial 84 finished with value: 0.7614478974096933 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.016433170523120006, 'num_leaves': 40, 'max_depth': 10, 'feature_fraction': 0.6325338948850173, 'bagging_fraction': 0.9204085560314782, 'bagging_freq': 4, 'lambda_l1': 0.26799496174672743, 'lambda_l2': 1.101659844109944, 'min_gain_to_split': 0.041498090541582724, 'min_data_in_leaf': 38}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  29%|██▊       | 86/300 [50:21<1:01:42, 17.30s/it]

[I 2025-10-14 01:25:32,274] Trial 85 finished with value: 0.7599283278448334 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.024920705824609146, 'num_leaves': 120, 'max_depth': 7, 'feature_fraction': 0.700241852054732, 'bagging_fraction': 0.9747162239637027, 'bagging_freq': 6, 'lambda_l1': 0.3876164831157937, 'lambda_l2': 1.2173974702656594, 'min_gain_to_split': 0.029539806727059102, 'min_data_in_leaf': 30}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  29%|██▉       | 87/300 [50:37<59:52, 16.87s/it]  

[I 2025-10-14 01:25:48,135] Trial 86 finished with value: 0.7593067503347515 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.017941964736391865, 'num_leaves': 96, 'max_depth': 9, 'feature_fraction': 0.6769359008547936, 'bagging_fraction': 0.8964686017115661, 'bagging_freq': 7, 'lambda_l1': 0.47814622446256627, 'lambda_l2': 0.929213518652291, 'min_gain_to_split': 0.043820136472459005, 'min_data_in_leaf': 42}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  29%|██▉       | 88/300 [50:56<1:02:25, 17.67s/it]

[I 2025-10-14 01:26:07,665] Trial 87 finished with value: 0.7585355206728965 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.013747021987260128, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6593634397764168, 'bagging_fraction': 0.9431500523392892, 'bagging_freq': 3, 'lambda_l1': 0.07362627757803147, 'lambda_l2': 1.0457584997954565, 'min_gain_to_split': 0.04936183664523047, 'min_data_in_leaf': 33}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  30%|██▉       | 89/300 [51:14<1:02:01, 17.64s/it]

[I 2025-10-14 01:26:25,241] Trial 88 finished with value: 0.7604084668001279 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.015406480364543052, 'num_leaves': 72, 'max_depth': 9, 'feature_fraction': 0.6164791199087754, 'bagging_fraction': 0.7848375355603618, 'bagging_freq': 5, 'lambda_l1': 0.2062375264809334, 'lambda_l2': 1.3756414543746684, 'min_gain_to_split': 0.046965349383691825, 'min_data_in_leaf': 28}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  30%|███       | 90/300 [51:38<1:08:44, 19.64s/it]

[I 2025-10-14 01:26:49,547] Trial 89 finished with value: 0.7620660300787647 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010532084125382323, 'num_leaves': 48, 'max_depth': 7, 'feature_fraction': 0.6496371605271029, 'bagging_fraction': 0.8635744240434222, 'bagging_freq': 4, 'lambda_l1': 0.2520104923049463, 'lambda_l2': 1.1566091146449424, 'min_gain_to_split': 0.03463584309551778, 'min_data_in_leaf': 39}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  30%|███       | 91/300 [52:04<1:15:11, 21.59s/it]

[I 2025-10-14 01:27:15,675] Trial 90 finished with value: 0.7609937537902032 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.01057809014595667, 'num_leaves': 48, 'max_depth': 7, 'feature_fraction': 0.6034394668698537, 'bagging_fraction': 0.8420166404092713, 'bagging_freq': 6, 'lambda_l1': 0.8989697942275252, 'lambda_l2': 0.2921688244038514, 'min_gain_to_split': 0.03967732297331558, 'min_data_in_leaf': 52}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  31%|███       | 92/300 [52:37<1:26:43, 25.02s/it]

[I 2025-10-14 01:27:48,699] Trial 91 finished with value: 0.7609043917219404 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.009218662800054971, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.649687350370924, 'bagging_fraction': 0.8749971320546935, 'bagging_freq': 4, 'lambda_l1': 0.25488938344888656, 'lambda_l2': 1.14455919196666, 'min_gain_to_split': 0.026962662358966508, 'min_data_in_leaf': 37}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  31%|███       | 93/300 [53:02<1:25:54, 24.90s/it]

[I 2025-10-14 01:28:13,335] Trial 92 finished with value: 0.7616122880178413 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011077375595214631, 'num_leaves': 48, 'max_depth': 8, 'feature_fraction': 0.6857483172425681, 'bagging_fraction': 0.8659134227885471, 'bagging_freq': 4, 'lambda_l1': 0.1520585486981164, 'lambda_l2': 0.9992470013451342, 'min_gain_to_split': 0.034248870320518696, 'min_data_in_leaf': 100}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  31%|███▏      | 94/300 [53:34<1:32:27, 26.93s/it]

[I 2025-10-14 01:28:44,999] Trial 93 finished with value: 0.7614188612993527 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.008924568215042316, 'num_leaves': 40, 'max_depth': 6, 'feature_fraction': 0.6271881466848735, 'bagging_fraction': 0.8588940973236744, 'bagging_freq': 4, 'lambda_l1': 0.32690112414222294, 'lambda_l2': 0.9780527313513077, 'min_gain_to_split': 0.0304864618135234, 'min_data_in_leaf': 86}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  32%|███▏      | 95/300 [53:59<1:30:27, 26.47s/it]

[I 2025-10-14 01:29:10,407] Trial 94 finished with value: 0.7613036615250509 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.009831687044165805, 'num_leaves': 48, 'max_depth': 9, 'feature_fraction': 0.6441863475595248, 'bagging_fraction': 0.8265302861283227, 'bagging_freq': 4, 'lambda_l1': 0.16340535875474296, 'lambda_l2': 0.6932061989739878, 'min_gain_to_split': 0.045727047696183215, 'min_data_in_leaf': 45}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  32%|███▏      | 96/300 [54:23<1:27:22, 25.70s/it]

[I 2025-10-14 01:29:34,291] Trial 95 finished with value: 0.7599233956610302 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.012168972321776071, 'num_leaves': 40, 'max_depth': 7, 'feature_fraction': 0.6881415941793173, 'bagging_fraction': 0.8646558760304995, 'bagging_freq': 5, 'lambda_l1': 0.35280028770026856, 'lambda_l2': 0.7891428238150765, 'min_gain_to_split': 0.025289080409151836, 'min_data_in_leaf': 100}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  32%|███▏      | 97/300 [54:54<1:32:11, 27.25s/it]

[I 2025-10-14 01:30:05,153] Trial 96 finished with value: 0.7610183388463397 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011403444538536214, 'num_leaves': 48, 'max_depth': 8, 'feature_fraction': 0.7431477294391291, 'bagging_fraction': 0.841175643939648, 'bagging_freq': 3, 'lambda_l1': 0.416720613416464, 'lambda_l2': 1.0235686588593267, 'min_gain_to_split': 0.022532717789403547, 'min_data_in_leaf': 78}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  33%|███▎      | 98/300 [55:13<1:23:26, 24.78s/it]

[I 2025-10-14 01:30:24,189] Trial 97 finished with value: 0.7597216110061804 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.012937790149538261, 'num_leaves': 32, 'max_depth': 7, 'feature_fraction': 0.7150242560678907, 'bagging_fraction': 0.8142833469751074, 'bagging_freq': 7, 'lambda_l1': 0.2896362933590562, 'lambda_l2': 1.092550261716062, 'min_gain_to_split': 0.038284898364409126, 'min_data_in_leaf': 91}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  33%|███▎      | 99/300 [55:37<1:22:13, 24.54s/it]

[I 2025-10-14 01:30:48,178] Trial 98 finished with value: 0.7614139060099773 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010941840038414774, 'num_leaves': 40, 'max_depth': 8, 'feature_fraction': 0.9114477449339269, 'bagging_fraction': 0.890794019394081, 'bagging_freq': 6, 'lambda_l1': 0.6344933403454395, 'lambda_l2': 0.8623221699957047, 'min_gain_to_split': 0.04767361832911838, 'min_data_in_leaf': 40}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  33%|███▎      | 100/300 [55:58<1:18:04, 23.42s/it]

[I 2025-10-14 01:31:08,977] Trial 99 finished with value: 0.7595851027573848 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.00979878686717069, 'num_leaves': 48, 'max_depth': 5, 'feature_fraction': 0.9822101404389042, 'bagging_fraction': 0.8255199417490174, 'bagging_freq': 7, 'lambda_l1': 0.09694180522367396, 'lambda_l2': 1.2468810729703173, 'min_gain_to_split': 0.021244867953621904, 'min_data_in_leaf': 49}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  34%|███▎      | 101/300 [56:15<1:11:53, 21.68s/it]

[I 2025-10-14 01:31:26,577] Trial 100 finished with value: 0.7598880821319791 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.020418737773959868, 'num_leaves': 56, 'max_depth': 10, 'feature_fraction': 0.6680244801584468, 'bagging_fraction': 0.7583367966156462, 'bagging_freq': 6, 'lambda_l1': 0.5014042067115447, 'lambda_l2': 1.4953688019133293, 'min_gain_to_split': 0.04982807355913396, 'min_data_in_leaf': 94}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  34%|███▍      | 102/300 [56:24<58:29, 17.73s/it]  

[I 2025-10-14 01:31:35,094] Trial 101 finished with value: 0.7280635958371411 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.018839805543435294, 'num_leaves': 64, 'max_depth': 1, 'feature_fraction': 0.6572177347450824, 'bagging_fraction': 0.915037060818346, 'bagging_freq': 4, 'lambda_l1': 0.22092425633488214, 'lambda_l2': 0.9169123759620048, 'min_gain_to_split': 0.034593698265414835, 'min_data_in_leaf': 33}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  34%|███▍      | 103/300 [56:41<57:13, 17.43s/it]

[I 2025-10-14 01:31:51,827] Trial 102 finished with value: 0.7601532700666224 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.02193413718820164, 'num_leaves': 56, 'max_depth': 9, 'feature_fraction': 0.676600882167819, 'bagging_fraction': 0.8829468490180127, 'bagging_freq': 4, 'lambda_l1': 0.25448651204552847, 'lambda_l2': 1.2025301212328598, 'min_gain_to_split': 0.03585763233313759, 'min_data_in_leaf': 25}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  35%|███▍      | 104/300 [57:16<1:14:05, 22.68s/it]

[I 2025-10-14 01:32:26,763] Trial 103 finished with value: 0.7610474983730541 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.010065841757366691, 'num_leaves': 48, 'max_depth': 8, 'feature_fraction': 0.6542228579802569, 'bagging_fraction': 0.9056343353980862, 'bagging_freq': 4, 'lambda_l1': 0.13994490070806542, 'lambda_l2': 1.1515109731208606, 'min_gain_to_split': 0.028060688265487306, 'min_data_in_leaf': 37}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  35%|███▌      | 105/300 [57:35<1:10:27, 21.68s/it]

[I 2025-10-14 01:32:46,098] Trial 104 finished with value: 0.7602682891122123 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.017069032932674318, 'num_leaves': 56, 'max_depth': 8, 'feature_fraction': 0.6847891811808821, 'bagging_fraction': 0.960178979824138, 'bagging_freq': 4, 'lambda_l1': 0.31077356992604677, 'lambda_l2': 0.9695239423047737, 'min_gain_to_split': 0.03134017365623487, 'min_data_in_leaf': 46}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  35%|███▌      | 106/300 [57:57<1:10:37, 21.84s/it]

[I 2025-10-14 01:33:08,318] Trial 105 finished with value: 0.7609564684949319 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.013925419239824243, 'num_leaves': 48, 'max_depth': 9, 'feature_fraction': 0.6973150774776838, 'bagging_fraction': 0.8518498721902544, 'bagging_freq': 4, 'lambda_l1': 0.19830255618064008, 'lambda_l2': 1.05105118670828, 'min_gain_to_split': 0.03575120782279206, 'min_data_in_leaf': 43}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  36%|███▌      | 107/300 [58:22<1:13:42, 22.92s/it]

[I 2025-10-14 01:33:33,750] Trial 106 finished with value: 0.7616324455732074 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011380912109251774, 'num_leaves': 40, 'max_depth': 8, 'feature_fraction': 0.6732592392963085, 'bagging_fraction': 0.87525179306552, 'bagging_freq': 2, 'lambda_l1': 0.4530631403698401, 'lambda_l2': 0.7438102799549421, 'min_gain_to_split': 0.046563397040983494, 'min_data_in_leaf': 35}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  36%|███▌      | 108/300 [58:49<1:16:55, 24.04s/it]

[I 2025-10-14 01:34:00,409] Trial 107 finished with value: 0.7600669083043216 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.011255330601491937, 'num_leaves': 40, 'max_depth': 7, 'feature_fraction': 0.6715578863226342, 'bagging_fraction': 0.7919960002042916, 'bagging_freq': 2, 'lambda_l1': 0.4099937405833109, 'lambda_l2': 0.7511645906447656, 'min_gain_to_split': 0.048738688855700166, 'min_data_in_leaf': 40}. Best is trial 51 with value: 0.7621592404227595.


Best trial: 51. Best value: 0.762159:  36%|███▋      | 109/300 [59:12<1:15:14, 23.63s/it]

[I 2025-10-14 01:34:23,098] Trial 108 finished with value: 0.7609322849977035 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.012321990614785103, 'num_leaves': 32, 'max_depth': 9, 'feature_fraction': 0.64269625356524, 'bagging_fraction': 0.8068054604901553, 'bagging_freq': 5, 'lambda_l1': 0.4530805756246279, 'lambda_l2': 0.6520963458216852, 'min_gain_to_split': 0.04642556150570481, 'min_data_in_leaf': 39}. Best is trial 51 with value: 0.7621592404227595.


In [None]:
import os
import json
import random
import pickle
import pandas as pd
import numpy as np
import lightgbm as lgb
from scipy.stats import spearmanr
from datetime import datetime

# ============================================================
#                  GLOBAL CONFIG
# ============================================================
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
print(f"🔒 Global seed set to {SEED}")

# ============================================================
#                  PATHS
# ============================================================
EXPERIMENT_DIR = "../results/lgbm/leave_one_chr"
TRAIN_PATH = "../preprocessed_data/CAGE-merged/4. features selection/X1_all_rank_features_pruned.tsv"
VAL_PATH   = "../preprocessed_data/CAGE-merged/4. features selection/X2_all_rank_features_pruned.tsv"

META_COLS = ["gene_name", "chr", "gene_start", "gene_end",
             "TSS_start", "TSS_end", "strand", "gex", "gex_rank"]
TARGET_COL = "gex_rank"

os.makedirs(EXPERIMENT_DIR, exist_ok=True)
LOG_PATH = os.path.join(EXPERIMENT_DIR, "log.txt")
with open(LOG_PATH, "w") as f:
    f.write("==== Leave-One-Chromosome-Out Experiment ====\n")

def log(msg):
    print(msg)
    with open(LOG_PATH, "a") as f:
        f.write(f"{msg}\n")

# ============================================================
#                  DATA LOADING
# ============================================================
df_train_full = pd.read_csv(TRAIN_PATH, sep="\t")
df_val_full   = pd.read_csv(VAL_PATH, sep="\t")

feature_cols = [c for c in df_train_full.columns if c not in META_COLS]
chromosomes = [f"chr{i}" for i in range(2, 23)]
log(f"📚 Loaded data with {len(chromosomes)} chromosomes.")
log(f"Feature count: {len(feature_cols)}")

# ============================================================
#                  LIGHTGBM PARAMS
# ============================================================
params = {
    "objective": "regression",
    "metric": "rmse",
    "verbosity": -1,
    "boosting_type": "gbdt",
    "learning_rate": 0.05,
    "num_leaves": 16,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.8,
    "bagging_freq": 3,
    "seed": SEED,
    "deterministic": True,
    "force_row_wise": True,
}

# ============================================================
#                  EXPERIMENT LOOP
# ============================================================
overall_results = []
preds_all = []

for test_chr in chromosomes:
    log(f"\n🧪 Outer Loop → Test chromosome: {test_chr}")
    outer_dir = os.path.join(EXPERIMENT_DIR, f"test_{test_chr}")
    os.makedirs(outer_dir, exist_ok=True)

    # 剩下的 20 個 chromosome 用於 inner CV
    inner_chrs = [c for c in chromosomes if c != test_chr]
    folds = [inner_chrs[i::5] for i in range(5)]
    log(f"Inner folds (5-fold over 20 chr): {folds}")

    test_df = df_val_full[df_val_full["chr"] == test_chr].copy()
    test_X = test_df[feature_cols]
    fold_preds = []

    for fold_idx, val_chrs in enumerate(folds):
        log(f"\n🚀 Fold {fold_idx+1}/5 | Val chromosomes: {val_chrs}")
        fold_dir = os.path.join(outer_dir, f"fold_{fold_idx+1}")
        os.makedirs(fold_dir, exist_ok=True)

        train_chrs = [c for c in inner_chrs if c not in val_chrs]
        df_train = df_train_full[df_train_full["chr"].isin(train_chrs)]
        df_val = df_train_full[df_train_full["chr"].isin(val_chrs)]

        X_train, y_train = df_train[feature_cols], df_train[TARGET_COL]
        X_val, y_val = df_val[feature_cols], df_val[TARGET_COL]

        dtrain = lgb.Dataset(X_train, label=y_train)
        dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)

        model = lgb.train(
            params,
            dtrain,
            valid_sets=[dtrain, dval],
            valid_names=["train", "val"],
            num_boost_round=2000,
            callbacks=[
                lgb.early_stopping(stopping_rounds=100),
                lgb.log_evaluation(period=200)
            ],
        )

        model_path = os.path.join(fold_dir, "model.pkl")
        with open(model_path, "wb") as f:
            pickle.dump(model, f)
        log(f"💾 Model saved: {model_path}")

        val_pred = model.predict(X_val, num_iteration=model.best_iteration)
        val_spearman = spearmanr(y_val, val_pred)[0]
        log(f"📈 Fold {fold_idx+1} inner val Spearman = {val_spearman:.4f}")

        # 對 test_chr 預測
        test_pred = model.predict(test_X, num_iteration=model.best_iteration)
        fold_preds.append(test_pred)

    # =======================================================
    # 平均五個模型對 test_chr 的預測
    # =======================================================
    mean_pred = np.mean(np.vstack(fold_preds), axis=0)
    test_df["predicted_gex_rank"] = mean_pred
    test_df["test_chr"] = test_chr
    preds_all.append(test_df[["gene_name", "chr", "predicted_gex_rank"]])

    # 計算 Spearman
    test_spearman = spearmanr(test_df["gex_rank"], test_df["predicted_gex_rank"])[0]
    overall_results.append({"test_chr": test_chr, "spearman": test_spearman})
    log(f"🌍 Test chromosome {test_chr}: Spearman ρ = {test_spearman:.4f}")

# ============================================================
#                  SUMMARY
# ============================================================
df_preds_all = pd.concat(preds_all, ignore_index=True)
df_preds_all.to_csv(os.path.join(EXPERIMENT_DIR, "predict_all.tsv"), sep="\t", index=False)

df_summary = pd.DataFrame(overall_results)
df_summary.to_csv(os.path.join(EXPERIMENT_DIR, "summary.tsv"), sep="\t", index=False)

mean_rho = df_summary["spearman"].mean()
log(f"\n✅ Overall mean Spearman across 21 test chromosomes: {mean_rho:.4f}")
log(f"Experiment completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


🔒 Global seed set to 42
📚 Loaded data with 21 chromosomes.
Feature count: 277

🧪 Outer Loop → Test chromosome: chr2
Inner folds (5-fold over 20 chr): [['chr3', 'chr8', 'chr13', 'chr18'], ['chr4', 'chr9', 'chr14', 'chr19'], ['chr5', 'chr10', 'chr15', 'chr20'], ['chr6', 'chr11', 'chr16', 'chr21'], ['chr7', 'chr12', 'chr17', 'chr22']]

🚀 Fold 1/5 | Val chromosomes: ['chr3', 'chr8', 'chr13', 'chr18']
Training until validation scores don't improve for 100 rounds
[200]	train's rmse: 0.123313	val's rmse: 0.145375
Early stopping, best iteration is:
[156]	train's rmse: 0.127639	val's rmse: 0.145251
💾 Model saved: ../results/lgbm/leave_one_chr/test_chr2/fold_1/model.pkl
📈 Fold 1 inner val Spearman = 0.7779

🚀 Fold 2/5 | Val chromosomes: ['chr4', 'chr9', 'chr14', 'chr19']
Training until validation scores don't improve for 100 rounds
[200]	train's rmse: 0.121314	val's rmse: 0.14863
[400]	train's rmse: 0.103246	val's rmse: 0.148088
Early stopping, best iteration is:
[428]	train's rmse: 0.101137	val