In [None]:
import polars as pl

# 轉存 parquet
pl.read_csv("FILE_PATH").write_parquet("FILE_PATH")
pl.read_csv("FILE_PATH").write_parquet("FILE_PATH")

In [None]:
import optuna
import numpy as np
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
import polars as pl

# 讀資料
df_train = pl.read_parquet("FILE_PATH")
X = df_train.drop(["ID", "飆股"]).to_pandas()
y = df_train["飆股"].to_numpy()

# 固定 threshold（以 v4 為例）
with open("FILE_PATH", "r") as f:
    fixed_threshold = float(f.read().strip())

# 中心參數（以你 v4 為例）
center_params = {
    "learning_rate": 0.0425,
    "num_leaves": 117,
    "max_depth": 10,
    "min_child_samples": 174,
    "subsample": 0.93,
    "colsample_bytree": 0.77,
    "reg_alpha": 0.0004,
    "reg_lambda": 0.295,
}

def objective(trial):
    params = {
        "learning_rate": trial.suggest_float("learning_rate", center_params["learning_rate"] * 0.8, center_params["learning_rate"] * 1.2),
        "num_leaves": trial.suggest_int("num_leaves", center_params["num_leaves"] - 20, center_params["num_leaves"] + 20),
        "max_depth": trial.suggest_int("max_depth", center_params["max_depth"] - 2, center_params["max_depth"] + 2),
        "min_child_samples": trial.suggest_int("min_child_samples", center_params["min_child_samples"] - 30, center_params["min_child_samples"] + 30),
        "subsample": trial.suggest_float("subsample", center_params["subsample"] - 0.05, center_params["subsample"] + 0.05),
        "colsample_bytree": trial.suggest_float("colsample_bytree", center_params["colsample_bytree"] - 0.05, center_params["colsample_bytree"] + 0.05),
        "reg_alpha": trial.suggest_float("reg_alpha", center_params["reg_alpha"] * 0.5, center_params["reg_alpha"] * 2.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", center_params["reg_lambda"] * 0.5, center_params["reg_lambda"] * 2.0, log=True),
        "n_estimators": 1000,
        "random_state": 42,
        "is_unbalance": True,
        "verbosity": -1,
        "boosting_type": "gbdt",
        "objective": "binary"
    }

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    oof_preds = np.zeros(len(X))

    for train_idx, val_idx in skf.split(X, y):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model = lgb.LGBMClassifier(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            eval_metric="binary_logloss",
            callbacks=[
                lgb.early_stopping(50),
                lgb.log_evaluation(0)
            ]
        )

        val_preds = model.predict_proba(X_val)[:, 1]
        oof_preds[val_idx] = val_preds

    y_pred_binary = (oof_preds > fixed_threshold).astype(int)
    score = f1_score(y, y_pred_binary)
    return score

# 執行 Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

# 最佳結果
print("最佳 F1 分數：", study.best_value)
print("最佳參數組合：")
for k, v in study.best_params.items():
    print(f"  {k}: {v:.6f}" if isinstance(v, float) else f"  {k}: {v}")


In [None]:
print("最佳 F1 score：", study.best_value)
print("最佳參數組合：")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")
import json
with open("best_params_v2.json", "w") as f:
    json.dump(study.best_params, f, indent=2)