In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/playground-series-s5e8/sample_submission.csv
/kaggle/input/playground-series-s5e8/train.csv
/kaggle/input/playground-series-s5e8/test.csv
/kaggle/input/bank-marketing-dataset-full/bank-full.csv


In [2]:
# === Prep: データ読み込み・前処理・CV・LGBMパラメータ定義 ===
import numpy as np, pandas as pd, lightgbm as lgb
from sklearn.model_selection import StratifiedKFold

# --- Load ---
PATH = "/kaggle/input/playground-series-s5e8/"
train = pd.read_csv(PATH + "train.csv", index_col="id")
test  = pd.read_csv(PATH + "test.csv",  index_col="id")

TARGET = "y"
CATS = ['job','marital','education','default','housing','loan','contact','month','poutcome']
NUMS = ['age','balance','day','duration','campaign','pdays','previous']

# --- 軽いwinsorize（任意：過度な外れ値を弱めて安定化） ---
def winsorize(df, cols, lo=0.005, hi=0.995):
    df = df.copy()
    for c in cols:
        lo_b = df[c].quantile(lo); hi_b = df[c].quantile(hi)
        df[c] = df[c].clip(lo_b, hi_b)
    return df
train[NUMS] = winsorize(train, NUMS, 0.005, 0.995)[NUMS]
test[NUMS]  = winsorize(test,  NUMS, 0.005, 0.995)[NUMS]

# --- Categorical を train+testでレベル合わせして category 型へ ---
for c in CATS:
    all_vals = pd.concat([train[c].astype(str), test[c].astype(str)], axis=0)
    cats = pd.Categorical(all_vals).categories
    train[c] = pd.Categorical(train[c].astype(str), categories=cats)
    test[c]  = pd.Categorical(test[c].astype(str),  categories=cats)

# --- 特徴/目的変数 ---
y = train[TARGET].astype(int).values     # numpy にしておく（fit_predict内の y[idx] が安全に動く）
X_full = train.drop(columns=[TARGET])
X_test = test.copy()

# --- CV folds ---
N_SPLITS = 5
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
folds = list(skf.split(X_full, y))

# --- 不均衡対策 ---
neg = int((y == 0).sum())
pos = int((y == 1).sum())
spw = neg / max(pos, 1)
print(f"scale_pos_weight = {spw:.4f}  (neg={neg}, pos={pos})")
print("Prep done. shapes:", X_full.shape, X_test.shape)

# --- LGBM パラメータ（A=GBDT / B=GOSS / C=DART） ---
base_common = dict(
    objective="binary",
    metric="auc",
    learning_rate=0.03,
    num_leaves=192,
    max_depth=-1,
    min_data_in_leaf=24,
    lambda_l1=0.5,
    lambda_l2=3.0,
    feature_fraction=0.70,
    bagging_fraction=0.80,
    bagging_freq=1,
    max_bin=8192,
    scale_pos_weight=spw,
    force_row_wise=True,
    seed=42,
    max_cat_to_onehot=12,
    max_cat_threshold=128,
)

params_A = dict(boosting="gbdt", **base_common)

# GOSS は bagging を使わない（指定しても無視されるが警告低減のため外す）
params_B = dict(
    boosting="goss",
    objective="binary",
    metric="auc",
    learning_rate=0.03,
    num_leaves=128,
    max_depth=-1,
    min_data_in_leaf=32,
    lambda_l1=0.5,
    lambda_l2=2.0,
    feature_fraction=0.70,
    max_bin=4096,
    scale_pos_weight=spw,
    force_row_wise=True,
    seed=42,
    max_cat_to_onehot=12,
    max_cat_threshold=128,
)

# DART（多様性用・重みは少なめで混ぜる想定）
params_C = base_common.copy()
params_C.update({
    "boosting": "dart",
    "learning_rate": 0.05,   # base_commonの0.03を上書き
    "drop_rate": 0.10,
    "skip_drop": 0.5,
    "max_drop": 50,
    "xgboost_dart_mode": True,
    "uniform_drop": False,
})
# 早期終了と最大ラウンドの既定（Hotfix側で使います）
EARLY = 400
MAX_ROUNDS = 40000

# 外部 bank-full はまず混ぜない（Hotfix側の条件分岐を通さない）
ORIG_FRAC = 0.0

scale_pos_weight = 7.2884  (neg=659512, pos=90488)
Prep done. shapes: (750000, 16) (250000, 16)


In [3]:
# === Hotfix: fit_predict 定義 + 学習実行 + ブレンド & 提出 ===
import gc, numpy as np, pandas as pd, lightgbm as lgb
from sklearn.metrics import roc_auc_score, log_loss

# 推奨：外部 bank-full をまずは混ぜない（分布差で鈍るケースが多い）
try:
    ORIG_FRAC
except NameError:
    ORIG_FRAC = 0.0
else:
    ORIG_FRAC = float(ORIG_FRAC)

# 既存の params に categorical_column が入っていたら削除（Dataset側指定で十分）
for _pname in ["params_A", "params_B", "params_C"]:
    if _pname in globals():
        globals()[_pname].pop("categorical_column", None)
        # 小カテゴリはワンホット化（微増しやすい）
        globals()[_pname].update(dict(max_cat_to_onehot=12, max_cat_threshold=128))

# 早期終了の既定
try:
    EARLY
except NameError:
    EARLY = 400
try:
    MAX_ROUNDS
except NameError:
    MAX_ROUNDS = 40000
try:
    N_SPLITS
except NameError:
    N_SPLITS = 5

# 必須オブジェクトの存在チェック（未定義ならここでエラーにします）
assert "X_full" in globals() and "X_test" in globals() and "y" in globals(), "X_full/X_test/y が未定義です。前セルを実行してください。"
assert "folds" in globals(), "folds が未定義です。CV 分割のセルを実行してください。"
assert "CATS" in globals(), "CATS が未定義です。前処理セルを実行してください。"

def fit_predict(params, X, y, X_test, folds, early_stopping=400, label="MODEL"):
    """LightGBM を CV 学習して OOF / Test を返す（外部データ混入なし版）"""
    import gc
    from sklearn.metrics import roc_auc_score, log_loss

    oof  = np.zeros(len(X), dtype=float)
    pred = np.zeros(len(X_test), dtype=float)

    for i, (tr_idx, va_idx) in enumerate(folds, 1):
        X_tr, y_tr = X.iloc[tr_idx], y[tr_idx]
        X_va, y_va = X.iloc[va_idx], y[va_idx]

        dtr = lgb.Dataset(X_tr, label=y_tr, categorical_feature=CATS, free_raw_data=False)
        dvl = lgb.Dataset(X_va, label=y_va, categorical_feature=CATS, free_raw_data=False)

        model = lgb.train(
            params=params,
            train_set=dtr,
            num_boost_round=MAX_ROUNDS,          # 例: 40000
            valid_sets=[dtr, dvl],
            valid_names=["train","valid"],
            callbacks=[lgb.early_stopping(early_stopping),  # 例: 400
                       lgb.log_evaluation(1000)],
        )

        p_va = model.predict(X_va,   num_iteration=model.best_iteration)
        p_te = model.predict(X_test, num_iteration=model.best_iteration)

        oof[va_idx] = p_va
        pred       += p_te / len(folds)

        auc = roc_auc_score(y_va, p_va)
        ll  = log_loss(y_va, np.clip(p_va, 1e-15, 1-1e-15))
        print(f"[{label} Fold {i}] AUC={auc:.6f}  BestIter={model.best_iteration}")

        del model, dtr, dvl, X_tr, X_va
        gc.collect()

    print(f"[{label}] OOF AUC={roc_auc_score(y, oof):.6f}")
    return oof, pred


# === Seed Bagging: 同じ設定でseedだけ変えて平均化 ===
from sklearn.metrics import roc_auc_score

def fit_predict_seedbag(params, X_full, y, X_test, folds, EARLY, label="A-GBDT",
                        seeds=(42, 77, 2024)):
    """params を seeds の回数ぶん走らせ、OOF/TEST を平均して返す"""
    oofs, preds = [], []
    for sd in seeds:
        p = params.copy()
        # 乱数種を差し替え（bagging/feature_fractionのランダム性を活かす）
        p["seed"] = sd
        p["bagging_seed"] = sd
        p["feature_fraction_seed"] = sd

        oof_i, pred_i = fit_predict(p, X_full, y, X_test, folds, EARLY, f"{label}-s{sd}")
        oofs.append(oof_i)
        preds.append(pred_i)

    oof_mean  = np.mean(oofs, axis=0)
    pred_mean = np.mean(preds, axis=0)
    try:
        print(f"[{label} seedbag] OOF AUC={roc_auc_score(y, oof_mean):.6f}")
    except Exception:
        pass
    return oof_mean, pred_mean
# --- モデルを用意（A/B は必須、C はあれば使う） ---
models = []
assert "params_A" in globals() and "params_B" in globals(), "params_A / params_B が未定義です。"
models.append(("A-GBDT", params_A))
models.append(("B-GOSS", params_B))
#if "params_C" in globals():
#    models.append(("C-DART", params_C))

# --- 学習 ---
oofs, preds, labels = [], [], []
for lbl, prm in models:
    oof_m, pred_m = fit_predict(prm, X_full, y, X_test, folds, EARLY, lbl)
    oofs.append(oof_m); preds.append(pred_m); labels.append(lbl)

# --- OOF で最適重み探索（2本 or 3本に対応） ---
def best_weights(oofs, y, labels):
    k = len(oofs)
    if k == 2:
        best_auc, best = -1.0, None
        for w in np.linspace(0, 1, 51):
            mix = w*oofs[0] + (1-w)*oofs[1]
            auc = roc_auc_score(y, mix)
            if auc > best_auc:
                best_auc, best = auc, (w, 1-w)
        return best_auc, best
    elif k == 3:
        best_auc, best = -1.0, None
        for wA in np.linspace(0, 1, 26):
            for wB in np.linspace(0, 1-wA, 26):
                wC = 1.0 - wA - wB
                mix = wA*oofs[0] + wB*oofs[1] + wC*oofs[2]
                auc = roc_auc_score(y, mix)
                if auc > best_auc:
                    best_auc, best = auc, (wA, wB, wC)
        return best_auc, best
    else:
        raise ValueError("この簡易探索は 2 or 3 本モデルのみ対応です。")

best_auc, w = best_weights(oofs, y, labels)
print("\n[Blend search] best OOF AUC=", best_auc, "  weights=", dict(zip(labels, w)))

# --- ブレンド & 提出 ---
blend_oof  = np.zeros_like(oofs[0])
blend_pred = np.zeros_like(preds[0])
for wi, o in zip(w, oofs):
    blend_oof += wi * o
for wi, p in zip(w, preds):
    blend_pred += wi * p

print("="*60)
print("Per-model OOF:")
for lbl, o in zip(labels, oofs):
    print(f"  {lbl:7s} AUC={roc_auc_score(y, o):.6f}  LL={log_loss(y, np.clip(o,1e-15,1-1e-15)):.6f}")
print(f"BLEND   AUC={roc_auc_score(y, blend_oof):.6f}  LL={log_loss(y, np.clip(blend_oof,1e-15,1-1e-15)):.6f}")

sub = pd.DataFrame({"id": X_test.index, "y": blend_pred})
sub.to_csv("/kaggle/working/submission.csv", index=False)
print("Saved: /kaggle/working/submission.csv  shape:", sub.shape)

[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Total Bins 6967
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120652 -> initscore=-1.986273
[LightGBM] [Info] Start training from score -1.986273
Training until validation scores don't improve for 400 rounds
[1000]	train's auc: 0.985154	valid's auc: 0.973268
[2000]	train's auc: 0.991972	valid's auc: 0.973832
Early stopping, best iteration is:
[2366]	train's auc: 0.993565	valid's auc: 0.973868
[A-GBDT Fold 1] AUC=0.973868  BestIter=2366
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Total Bins 6947
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120652 -> initscore=-1.986273
[LightGBM] [Info] Start training from score -1.986273
Training until validation scores don't

In [4]:
# ================================
# XGBoost depthwise + lossguide（外部 bank-full 併用可）フル版
# ================================
import os, gc, numpy as np, pandas as pd, xgboost as xgb
from itertools import combinations
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss

# ---------- 1) データ読込 ----------
PATH = "/kaggle/input/playground-series-s5e8/"
train = pd.read_csv(PATH + "train.csv", index_col="id")
test  = pd.read_csv(PATH + "test.csv",  index_col="id")

TARGET = "y"
NUMS = ['age','balance','day','duration','campaign','pdays','previous']
CATS = ['job','marital','education','default','housing','loan','contact','month','poutcome']

# 外部データ（任意）
orig = None
for p in [
    "/kaggle/input/bank-marketing-dataset-full/bank-full.csv",
    "/kaggle/input/bank-marketing-datasets/bank-full.csv",
    "/kaggle/input/uci-bank-marketing-dataset/bank-full.csv",
    "/kaggle/input/bank-marketing-dataset/bank-full.csv",
]:
    if os.path.exists(p):
        o = pd.read_csv(p, delimiter=";")
        o["y"] = o["y"].map({"yes":1, "no":0}).astype(int)
        # 必要列のみ揃える（外部が余分な列を持っていても安全）
        keep = [c for c in (NUMS+CATS+[TARGET]) if c in o.columns]
        orig = o[keep].copy()
        break
USE_ORIG = orig is not None
if USE_ORIG:
    print(f"Found external: {p}  shape={orig.shape}")
else:
    print("No external bank-full found. Proceeding without it.")

# 文字列化（ペア符号化のため）
for df in (train, test) + ((orig,) if USE_ORIG else ()):
    for c in CATS:
        df[c] = df[c].astype(str)

# ---------- 2) ペア特徴（全ペア）を factorize で一括作成 ----------
def build_pairwise_codes(train, test, orig, use_orig, num_cols, cat_cols):
    cols_all = num_cols + cat_cols
    PAIR_COLS = []
    new_tr, new_te = {}, {}
    new_or = {} if use_orig else None

    for a, b in combinations(cols_all, 2):
        name = f"{a}-{b}"
        tr = (train[a].astype(str).values + "_" + train[b].astype(str).values)
        te = (test[a].astype(str).values  + "_" + test[b].astype(str).values)
        if use_orig:
            orv = (orig[a].astype(str).values + "_" + orig[b].astype(str).values)
            combo = np.concatenate([tr, te, orv])
            codes = pd.factorize(combo)[0].astype("int32")
            ntr, nte = len(tr), len(te)
            new_tr[name] = codes[:ntr]
            new_te[name] = codes[ntr:ntr+nte]
            new_or[name] = codes[ntr+nte:]
        else:
            combo = np.concatenate([tr, te])
            codes = pd.factorize(combo)[0].astype("int32")
            ntr = len(tr)
            new_tr[name] = codes[:ntr]
            new_te[name] = codes[ntr:]
        PAIR_COLS.append(name)

    train2 = pd.concat([train, pd.DataFrame(new_tr, index=train.index)], axis=1)
    test2  = pd.concat([test,  pd.DataFrame(new_te, index=test.index)], axis=1)
    orig2  = None
    if use_orig:
        orig2 = pd.concat([orig, pd.DataFrame(new_or, index=orig.index)], axis=1)

    # デフラグ
    return train2.copy(), test2.copy(), (orig2.copy() if use_orig else None), PAIR_COLS

train2, test2, orig2, PAIR_COLS = build_pairwise_codes(train, test, orig, USE_ORIG, NUMS, CATS)
FEATURES = [c for c in train2.columns if c != TARGET]
print(f"train/test shapes: {train2.shape} {test2.shape}")

# ---------- 3) Count-encoding を一括適用 ----------
def count_encode_bulk(tr, va, te, cols, use_log1p=True):
    ce_tr, ce_va, ce_te = {}, {}, {}
    for c in cols:
        vc = tr[c].value_counts()
        s_tr = tr[c].map(vc).fillna(0)
        s_va = va[c].map(vc).fillna(0)
        s_te = te[c].map(vc).fillna(0)
        if use_log1p:
            s_tr = np.log1p(s_tr)
            s_va = np.log1p(s_va)
            s_te = np.log1p(s_te)
        ce_tr[f"CE_{c}"] = s_tr.astype("float32").values
        ce_va[f"CE_{c}"] = s_va.astype("float32").values
        ce_te[f"CE_{c}"] = s_te.astype("float32").values
    tr_out = pd.concat([tr.drop(columns=cols, errors="ignore"),
                        pd.DataFrame(ce_tr, index=tr.index)], axis=1)
    va_out = pd.concat([va.drop(columns=cols, errors="ignore"),
                        pd.DataFrame(ce_va, index=va.index)], axis=1)
    te_out = pd.concat([te.drop(columns=cols, errors="ignore"),
                        pd.DataFrame(ce_te, index=te.index)], axis=1)
    return tr_out, va_out, te_out

# ---------- 4) CV 設定 ----------
N_FOLDS = 6            # ★ スコア引き上げ狙いで 10-fold
SEED    = 42
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

# ---------- 5) XGBoost パラメータ（depthwise と lossguide）----------
XGB_PARAMS = dict(
    n_estimators=20000,
    learning_rate=0.010,
    objective="binary:logistic",
    eval_metric=["auc","logloss"],
    max_depth=7,
    min_child_weight=8,
    gamma=1.0,
    subsample=0.85,
    colsample_bytree=0.50,
    reg_lambda=4.0,
    reg_alpha=3.0,
    max_bin=1024,             # ★ 分割の解像度を上げる
    random_state=SEED,
    n_jobs=-1,
    tree_method="hist",
    enable_categorical=False,
    early_stopping_rounds=1500
)

# 衝突しうるキーを除外してから上書き
XGB_PARAMS_LOSSGUIDE = dict(
    **{k:v for k,v in XGB_PARAMS.items()
       if k not in ["max_depth","min_child_weight","gamma"]},
    grow_policy="lossguide",
    max_depth=0,                # lossguide は深さ固定なし
    max_leaves=192,             # 表現力UP（必要なら 128～256 の間で微調整）
    min_child_weight=14,        # やや強めに
    gamma=0.5
)

# ---------- 6) 学習関数 ----------
def run_cv(params, name="XGB"):
    oof = np.zeros(len(train2), dtype=float)
    pred = np.zeros(len(test2), dtype=float)

    for fold, (tr_idx, va_idx) in enumerate(skf.split(train2, train2[TARGET]), 1):
        tr_df = train2.iloc[tr_idx].copy()
        va_df = train2.iloc[va_idx].copy()
        te_df = test2.copy()

        # 補助データを結合
        if USE_ORIG and orig2 is not None:
            add_cols = [c for c in FEATURES if c in orig2.columns]
            X_tr = pd.concat([tr_df[add_cols], orig2[add_cols]], ignore_index=True)
            y_tr = np.concatenate([tr_df[TARGET].values, orig2[TARGET].values])
        else:
            add_cols = FEATURES
            X_tr = tr_df[add_cols]
            y_tr = tr_df[TARGET].values

        X_va = va_df[add_cols]; y_va = va_df[TARGET].values
        X_te = te_df[add_cols]

        # Count-Enc（PAIR_COLS + CATS）を学習foldで作成
        ce_cols = [c for c in (PAIR_COLS + CATS) if c in add_cols]
        X_tr, X_va, X_te = count_encode_bulk(X_tr, X_va, X_te, ce_cols, use_log1p=True)

        model = xgb.XGBClassifier(**params)
        model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], verbose=1000)

        # 予測（best_iteration を尊重）
        best_it = getattr(model, "best_iteration", None)
        try:
            p_va = model.predict_proba(X_va, iteration_range=(0, best_it+1))[:, 1]
            p_te = model.predict_proba(X_te, iteration_range=(0, best_it+1))[:, 1]
        except Exception:
            p_va = model.predict_proba(X_va)[:, 1]
            p_te = model.predict_proba(X_te)[:, 1]

        oof[va_idx] = p_va
        pred += p_te / skf.n_splits

        auc = roc_auc_score(y_va, p_va); ll = log_loss(y_va, p_va, eps=1e-15)
        print(f"[{name} Fold {fold}] AUC={auc:.6f}  LogLoss={ll:.6f}  best_it={best_it}")

        del model, tr_df, va_df, te_df, X_tr, X_va, X_te
        gc.collect()

    return oof, pred

# ---------- 7) 2本学習 → 平均 ----------
print(f"========== Seed {SEED} / {N_FOLDS}-fold ==========")
oof_A, pred_A = run_cv(XGB_PARAMS,            name="XGB-depthwise")
oof_B, pred_B = run_cv(XGB_PARAMS_LOSSGUIDE,  name="XGB-lossguide")

oof_blend  = 0.5*oof_A + 0.5*oof_B
pred_blend = 0.5*pred_A + 0.5*pred_B

print("="*60)
print("OOF AUC (depthwise) =", roc_auc_score(train2[TARGET], oof_A))
print("OOF AUC (lossguide) =", roc_auc_score(train2[TARGET], oof_B))
print("OOF AUC (blend)     =", roc_auc_score(train2[TARGET], oof_blend))

# ---------- 8) 保存 ----------
pd.DataFrame({"xgb_oof": oof_blend}).to_csv("/kaggle/working/xgb_oof.csv", index=False)
pd.DataFrame({"xgb_pred": pred_blend}).to_csv("/kaggle/working/xgb_pred.csv", index=False)
pd.DataFrame({"id": test2.index, "y": pred_blend}).to_csv("/kaggle/working/submission.csv", index=False)
print("Saved: submission.csv / xgb_oof.csv / xgb_pred.csv")


Found external: /kaggle/input/bank-marketing-dataset-full/bank-full.csv  shape=(45211, 17)
train/test shapes: (750000, 137) (250000, 136)
[0]	validation_0-auc:0.94836	validation_0-logloss:0.37762
[1000]	validation_0-auc:0.96936	validation_0-logloss:0.14260
[2000]	validation_0-auc:0.97170	validation_0-logloss:0.13708
[3000]	validation_0-auc:0.97274	validation_0-logloss:0.13454
[4000]	validation_0-auc:0.97335	validation_0-logloss:0.13298
[5000]	validation_0-auc:0.97373	validation_0-logloss:0.13196
[6000]	validation_0-auc:0.97399	validation_0-logloss:0.13124
[7000]	validation_0-auc:0.97415	validation_0-logloss:0.13076
[8000]	validation_0-auc:0.97426	validation_0-logloss:0.13043
[9000]	validation_0-auc:0.97433	validation_0-logloss:0.13021
[10000]	validation_0-auc:0.97438	validation_0-logloss:0.13006
[11000]	validation_0-auc:0.97440	validation_0-logloss:0.12998
[12000]	validation_0-auc:0.97440	validation_0-logloss:0.12995
[13000]	validation_0-auc:0.97440	validation_0-logloss:0.12994
[14000]

In [8]:
# === Fixed-weight blender: XGB(depthwise/lossguide) + LGBM ===
import os, numpy as np, pandas as pd

# 1) test の id を取得
PATH = "/kaggle/input/playground-series-s5e8/"
test = pd.read_csv(PATH + "test.csv", index_col="id")

def try_read_pred(paths, prefer_cols=("y","pred","prediction","xgb_pred","lgb_pred")):
    """最初に見つかったCSVから予測ベクトル(1D)を返す。列名が不明でも1列ならOK。"""
    for p in paths:
        if os.path.exists(p):
            df = pd.read_csv(p)
            for c in prefer_cols:
                if c in df.columns:
                    return df[c].values
            if df.shape[1] == 1:
                return df.iloc[:,0].values
    return None

# 2) ファイル候補（足りない名前も拾えるように多めに列挙）
WRK = "/kaggle/working"
pred_lgb = try_read_pred([
    f"{WRK}/lgb_pred.csv", f"{WRK}/LGBM_pred.csv", f"{WRK}/pred_lgb.csv",
    f"{WRK}/A-GBDT_pred.csv", f"{WRK}/B-GOSS_pred.csv", f"{WRK}/lgb_blend_pred.csv",
    # 最後に保存した submission がLGBMの可能性がある場合の保険（非推奨だが一応）
    f"{WRK}/submission_lgb.csv"
])

pred_xgb_dw = try_read_pred([
    f"{WRK}/xgb_depth_pred.csv", f"{WRK}/xgb_dw_pred.csv", f"{WRK}/xgb_depthwise_pred.csv",
    # depthwise単体を保存していない場合、2本平均のxgb_predは使わない（誤混入防止）
])

pred_xgb_lg = try_read_pred([
    f"{WRK}/xgb_loss_pred.csv", f"{WRK}/xgb_lg_pred.csv", f"{WRK}/xgb_lossguide_pred.csv",
])

# 3) 使えるモデルを集める（無いものはスキップ）
models = []
if pred_xgb_lg is not None: models.append(("xgb_lossguide", pred_xgb_lg))
if pred_xgb_dw is not None: models.append(("xgb_depthwise", pred_xgb_dw))
if pred_lgb    is not None: models.append(("lgbm", pred_lgb))

assert len(models) >= 2, "使える予測が2本以上見つかりません。保存CSVのパス名を確認してください。"

# 4) 固定重み（見つかったモデルに合わせて自動で正規化）
fixed_w = {
    "xgb_lossguide": 0.40,
    "xgb_depthwise": 0.30,
    "lgbm":          0.30,
}
# lossguideが無い場合は  depthwise 0.65 / lgbm 0.35 に置き換え
if "xgb_lossguide" not in dict(models) and {"xgb_depthwise","lgbm"}.issubset({k for k,_ in models}):
    fixed_w = {"xgb_depthwise": 0.65, "lgbm": 0.35}

# 有るモデルだけ抜き出して正規化
w = np.array([fixed_w[mname] for mname,_ in models], dtype=float)
w = w / w.sum()

# 5) ブレンドして保存
pred = np.zeros(len(test), dtype=float)
for (mname, p), wi in zip(models, w):
    pred += wi * p

sub = pd.DataFrame({"id": test.index, "y": pred})
sub.to_csv(f"{WRK}/submission.csv", index=False)
print("Used models & weights:", [(m, float(wi)) for (m,_), wi in zip(models, w)])
print("Saved:", f"{WRK}/submission.csv  shape={sub.shape}")

Used models & weights: [('xgb_lossguide', 0.4), ('xgb_depthwise', 0.3), ('lgbm', 0.3)]
Saved: /kaggle/working/submission.csv  shape=(250000, 2)
