In [2]:
from pathlib import Path
import sys
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import (
    precision_recall_curve,
    average_precision_score,
    confusion_matrix,
)

PROJECT_ROOT = Path("/Users/jy/project_2nd/SKN23-2nd-3Team")
sys.path.insert(0, str(PROJECT_ROOT))

from app.utils.save import save_model_and_artifacts

try:
    from app.utils.plotting import configure_matplotlib_korean
    configure_matplotlib_korean()
except Exception:
    pass


# 
# 공통: metrics / percentiles 출력&저장 유틸
# 
def build_ranking_metrics(y_true, y_prob, k_list=(5, 10, 15, 20, 25, 30)):
    y_true = np.asarray(y_true).astype(int).reshape(-1)
    y_prob = np.asarray(y_prob).astype(float).reshape(-1)

    pr_auc = float(average_precision_score(y_true, y_prob))

    df_rank = pd.DataFrame({"y": y_true, "score": y_prob}).sort_values("score", ascending=False)
    base_rate = float(df_rank["y"].mean())
    total_pos = float(df_rank["y"].sum())
    n_total = len(df_rank)

    ranking = []
    for k in k_list:
        n_sel = max(int(np.floor(n_total * k / 100)), 1)
        selected = df_rank.iloc[:n_sel]

        precision_k = float(selected["y"].mean())
        recall_k = float(selected["y"].sum() / (total_pos + 1e-12))
        lift_k = float(precision_k / base_rate) if base_rate > 0 else 0.0

        ranking.append({"Top_K": f"{k}%", "Precision": precision_k, "Recall": recall_k, "Lift": lift_k})

    return {
        "PR-AUC (Average Precision)": pr_auc,
        "상위 5% 정밀도 (Precision)": ranking[0]["Precision"],
        "상위 5% 재현율 (Recall)": ranking[0]["Recall"],
        "상위 5% 리프트 (Lift)": ranking[0]["Lift"],
        "ranking": ranking,
    }


def score_percentiles_payload(model_id: str, split: str, y_prob, pcts=(1, 5, 10, 20, 30, 50)):
    y_prob = np.asarray(y_prob, dtype=float).reshape(-1)
    percentiles = [{"pct": int(p), "score": float(np.quantile(y_prob, 1.0 - p / 100.0))} for p in pcts]
    return {"model_id": model_id, "split": split, "percentiles": percentiles}


def _stem_from_model_id(model_id: str) -> str:
    if model_id.startswith("dl__mlp_"):
        return "mlp_" + model_id.split("dl__mlp_", 1)[1]
    if model_id.startswith("ml__"):
        return model_id.split("ml__", 1)[1]
    if model_id.startswith("dl__"):
        return model_id.split("dl__", 1)[1]
    return model_id


def save_and_print_score_percentiles(PROJECT_ROOT: Path, model_id: str, split: str, y_prob):
    metrics_dir = PROJECT_ROOT / "models" / "metrics"
    metrics_dir.mkdir(parents=True, exist_ok=True)

    payload = score_percentiles_payload(model_id, split, y_prob, pcts=(1, 5, 10, 20, 30, 50))
    out_path = metrics_dir / f"{_stem_from_model_id(model_id)}_score_percentiles.json"
    out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")

    print(json.dumps(payload, ensure_ascii=False, indent=2))
    print(str(out_path))
    return payload, out_path


def plot_confusion_matrix(y_true, y_pred, title, labels=("비이탈(m1)", "이탈(m2)"), cmap="Blues"):
    y_true = np.asarray(y_true).astype(int)
    y_pred = np.asarray(y_pred).astype(int)
    cm = confusion_matrix(y_true, y_pred)

    fig, ax = plt.subplots(figsize=(6, 5))
    im = ax.imshow(cm, cmap=cmap, interpolation="nearest", aspect="equal")
    fig.colorbar(im, ax=ax)

    ax.set_title(title)
    ax.set_xlabel("Predicted (예측값)")
    ax.set_ylabel("Actual (실제값)")

    ax.set_xticks([0, 1])
    ax.set_yticks([0, 1])
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)

    thresh = cm.max() / 2.0 if cm.size else 0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(
                j, i, f"{cm[i, j]}",
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black",
                fontsize=12,
            )

    ax.set_xlim(-0.5, cm.shape[1] - 0.5)
    ax.set_ylim(cm.shape[0] - 0.5, -0.5)
    fig.tight_layout()
    return fig


def threshold_topk(y_prob, k_pct: int) -> float:
    y_prob = np.asarray(y_prob, dtype=float).reshape(-1)
    order = np.argsort(-y_prob)
    n_sel = max(int(np.floor(len(y_prob) * k_pct / 100)), 1)
    return float(y_prob[order[n_sel - 1]])


# 
# 1) 데이터 로드 & split 분리 (팀 규칙 준수)
# 
DATA_DIR = PROJECT_ROOT / "data" / "processed"
features = pd.read_parquet(DATA_DIR / "features_ml_clean.parquet")
labels = pd.read_parquet(DATA_DIR / "labels.parquet")

features["user_id"] = features["user_id"].astype(str)
labels["user_id"] = labels["user_id"].astype(str)

df = features.merge(
    labels[["user_id", "anchor_time", "label", "split"]],
    on=["user_id", "anchor_time"],
    how="inner",
    validate="one_to_one",
)

df["y"] = (df["label"] == "m2").astype(int)

feature_cols = [c for c in df.columns if c not in ["user_id", "anchor_time", "label", "split", "y"]]

train_df = df[df["split"] == "train"]
val_df = df[df["split"] == "val"]
test_df = df[df["split"] == "test"]

X_train, y_train = train_df[feature_cols], train_df["y"]
X_val, y_val = val_df[feature_cols], val_df["y"]
X_test, y_test = test_df[feature_cols], test_df["y"]


# 
# 2) HGB 하이퍼파라미터 튜닝 (VAL 기준, TEST는 마지막 1회)
# 
def tune_hgb_on_val(X_train, y_train, X_val, y_val, n_trials=25, seed=42):
    rng = np.random.default_rng(seed)

    best = {"pr_auc": -1.0, "params": None}

    for _ in range(n_trials):
        max_depth_choice = rng.choice([3, 4, 5, 6, 7, None])
        params = {
            "learning_rate": float(rng.choice([0.02, 0.03, 0.05, 0.07, 0.1])),
            "max_depth": None if max_depth_choice is None else int(max_depth_choice),
            "max_iter": int(rng.choice([200, 300, 500, 800])),
            "min_samples_leaf": int(rng.choice([10, 20, 30, 50, 100])),
            "l2_regularization": float(rng.choice([0.0, 1e-4, 1e-3, 1e-2, 1e-1])),
            "max_bins": int(rng.choice([64, 128, 255])),
            "random_state": 42,
        }

        model = HistGradientBoostingClassifier(**params)
        model.fit(X_train, y_train)

        val_prob = model.predict_proba(X_val)[:, 1]
        pr_auc = float(average_precision_score(np.asarray(y_val), val_prob))

        if pr_auc > best["pr_auc"]:
            best = {"pr_auc": pr_auc, "params": params}

    return best


tune_result = tune_hgb_on_val(X_train, y_train, X_val, y_val, n_trials=25, seed=42)
best_params = tune_result["params"]
best_val_pr_auc = tune_result["pr_auc"]

print("best_val_pr_auc:", best_val_pr_auc)
print("best_params:", json.dumps(best_params, ensure_ascii=False, indent=2))


# 
# 3) 최종 학습: train+val로 재학습 후 test 1회 평가
# 
X_tv = pd.concat([X_train, X_val], axis=0)
y_tv = pd.concat([y_train, y_val], axis=0)

hgb = HistGradientBoostingClassifier(**best_params)
hgb.fit(X_tv, y_tv)

test_prob = hgb.predict_proba(X_test)[:, 1]
test_true = np.asarray(y_test).astype(int)


# 
# 4) 출력(콘솔): percentiles JSON + 경로 + metrics JSON
# 
MODEL_ID = "ml__hgb"
SPLIT = "test"

save_and_print_score_percentiles(PROJECT_ROOT, MODEL_ID, SPLIT, test_prob)

metrics_payload = build_ranking_metrics(test_true, test_prob, k_list=(5, 10, 15, 20, 25, 30))
print(json.dumps(metrics_payload, ensure_ascii=False, indent=2))


# 
# 5) 그림: PR 커브 + TopK confusion (DL 형식)
# 
pr_auc_val = float(metrics_payload["PR-AUC (Average Precision)"])

precision, recall, _ = precision_recall_curve(test_true, test_prob)
fig_pr, ax_pr = plt.subplots(figsize=(6, 5))
ax_pr.plot(recall, precision, lw=2, label=f"PR-AUC = {pr_auc_val:.5f}")
ax_pr.set_xlabel("Recall")
ax_pr.set_ylabel("Precision")
ax_pr.set_title("Precision-Recall Curve")
ax_pr.legend()
ax_pr.grid(alpha=0.3)
fig_pr.tight_layout()

k_list = [5, 10, 15, 30]
figures = {"pr_curve": fig_pr}

for k in k_list:
    thr = threshold_topk(test_prob, k)
    y_pred_k = (np.asarray(test_prob) >= thr).astype(int)
    figures[f"confusion_matrix_top{k}"] = plot_confusion_matrix(
        test_true,
        y_pred_k,
        title=f"Confusion Matrix (Top {k}%, thr={thr:.5f})",
        labels=("비이탈(m1)", "이탈(m2)"),
        cmap="Blues",
    )


# 
# 6) 저장: save_model_and_artifacts (DL과 동일 흐름)
# 
saved = save_model_and_artifacts(
    model=hgb,
    model_name="hgb",
    model_type="ml",
    model_id=MODEL_ID,
    split=SPLIT,
    metrics=metrics_payload,
    y_true=test_true,
    y_prob=np.asarray(test_prob).astype(float),
    version="tuned_on_val",
    scaler=None,
    figures=figures,
)

print("saved paths:")
for k, v in saved.items():
    print(f"{k}: {v}")

plt.close(fig_pr)
for k in k_list:
    plt.close(figures[f"confusion_matrix_top{k}"])

best_val_pr_auc: 0.916921204669227
best_params: {
  "learning_rate": 0.05,
  "max_depth": 3,
  "max_iter": 800,
  "min_samples_leaf": 20,
  "l2_regularization": 0.01,
  "max_bins": 64,
  "random_state": 42
}
{
  "model_id": "ml__hgb",
  "split": "test",
  "percentiles": [
    {
      "pct": 1,
      "score": 0.9445879419430825
    },
    {
      "pct": 5,
      "score": 0.9386033626970024
    },
    {
      "pct": 10,
      "score": 0.9316305644647573
    },
    {
      "pct": 20,
      "score": 0.9164893802819916
    },
    {
      "pct": 30,
      "score": 0.8989172283549766
    },
    {
      "pct": 50,
      "score": 0.8581260113204877
    }
  ]
}
/Users/jy/project_2nd/SKN23-2nd-3Team/models/metrics/hgb_score_percentiles.json
{
  "PR-AUC (Average Precision)": 0.9330339316187535,
  "상위 5% 정밀도 (Precision)": 0.9685719897858966,
  "상위 5% 재현율 (Recall)": 0.055842449774637044,
  "상위 5% 리프트 (Lift)": 1.116991590630645,
  "ranking": [
    {
      "Top_K": "5%",
      "Precision": 0.968571989

In [None]:
from pathlib import Path
import sys
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import (
    precision_recall_curve,
    average_precision_score,
    confusion_matrix,
)

PROJECT_ROOT = Path("/Users/jy/project_2nd/SKN23-2nd-3Team")
sys.path.insert(0, str(PROJECT_ROOT))

from app.utils.save import save_model_and_artifacts

try:
    from app.utils.plotting import configure_matplotlib_korean
    configure_matplotlib_korean()
except Exception:
    pass



# utils: 저장/출력

def _stem_from_model_id(model_id: str) -> str:
    if model_id.startswith("dl__mlp_"):
        return "mlp_" + model_id.split("dl__mlp_", 1)[1]
    if model_id.startswith("ml__"):
        return model_id.split("ml__", 1)[1]
    if model_id.startswith("dl__"):
        return model_id.split("dl__", 1)[1]
    return model_id

def save_json_to_metrics_dir(stem: str, filename: str, payload: dict):
    metrics_dir = PROJECT_ROOT / "models" / "metrics"
    metrics_dir.mkdir(parents=True, exist_ok=True)
    out_path = metrics_dir / f"{stem}_{filename}.json"
    out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    print(json.dumps(payload, ensure_ascii=False, indent=2))
    print(str(out_path))
    return out_path

def score_percentiles_payload(model_id: str, split: str, y_prob, pcts=(1, 5, 10, 20, 30, 50)):
    y_prob = np.asarray(y_prob, dtype=float).reshape(-1)
    percentiles = [{"pct": int(p), "score": float(np.quantile(y_prob, 1.0 - p / 100.0))} for p in pcts]
    return {"model_id": model_id, "split": split, "percentiles": percentiles}

def build_ranking_metrics(y_true, y_prob, k_list=(5, 10, 15, 20, 25, 30)):
    y_true = np.asarray(y_true).astype(int).reshape(-1)
    y_prob = np.asarray(y_prob).astype(float).reshape(-1)

    pr_auc = float(average_precision_score(y_true, y_prob))

    df_rank = pd.DataFrame({"y": y_true, "score": y_prob}).sort_values("score", ascending=False)
    base_rate = float(df_rank["y"].mean())
    total_pos = float(df_rank["y"].sum())
    n_total = len(df_rank)

    ranking = []
    for k in k_list:
        n_sel = max(int(np.floor(n_total * k / 100)), 1)
        selected = df_rank.iloc[:n_sel]

        precision_k = float(selected["y"].mean())
        recall_k = float(selected["y"].sum() / (total_pos + 1e-12))
        lift_k = float(precision_k / base_rate) if base_rate > 0 else 0.0

        ranking.append({"Top_K": f"{k}%", "Precision": precision_k, "Recall": recall_k, "Lift": lift_k})

    return {
        "PR-AUC (Average Precision)": pr_auc,
        "상위 5% 정밀도 (Precision)": ranking[0]["Precision"],
        "상위 5% 재현율 (Recall)": ranking[0]["Recall"],
        "상위 5% 리프트 (Lift)": ranking[0]["Lift"],
        "ranking": ranking,
    }

def plot_confusion_matrix(y_true, y_pred, title, labels=("비이탈(m1)", "이탈(m2)"), cmap="Blues"):
    y_true = np.asarray(y_true).astype(int)
    y_pred = np.asarray(y_pred).astype(int)
    cm = confusion_matrix(y_true, y_pred)

    fig, ax = plt.subplots(figsize=(6, 5))
    im = ax.imshow(cm, cmap=cmap, interpolation="nearest", aspect="equal")
    fig.colorbar(im, ax=ax)

    ax.set_title(title)
    ax.set_xlabel("Predicted (예측값)")
    ax.set_ylabel("Actual (실제값)")

    ax.set_xticks([0, 1])
    ax.set_yticks([0, 1])
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)

    thresh = cm.max() / 2.0 if cm.size else 0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(
                j, i, f"{cm[i, j]}",
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black",
                fontsize=12,
            )

    ax.set_xlim(-0.5, cm.shape[1] - 0.5)
    ax.set_ylim(cm.shape[0] - 0.5, -0.5)
    fig.tight_layout()
    return fig

def threshold_topk(y_prob, k_pct: int) -> float:
    y_prob = np.asarray(y_prob, dtype=float).reshape(-1)
    order = np.argsort(-y_prob)
    n_sel = max(int(np.floor(len(y_prob) * k_pct / 100)), 1)
    return float(y_prob[order[n_sel - 1]])

def make_figures(test_true, test_prob, pr_auc_val, k_list=(5, 10, 15, 30)):
    precision, recall, _ = precision_recall_curve(test_true, test_prob)
    fig_pr, ax_pr = plt.subplots(figsize=(6, 5))
    ax_pr.plot(recall, precision, lw=2, label=f"PR-AUC = {pr_auc_val:.5f}")
    ax_pr.set_xlabel("Recall")
    ax_pr.set_ylabel("Precision")
    ax_pr.set_title("Precision-Recall Curve")
    ax_pr.legend()
    ax_pr.grid(alpha=0.3)
    fig_pr.tight_layout()

    figures = {"pr_curve": fig_pr}
    for k in k_list:
        thr = threshold_topk(test_prob, k)
        y_pred_k = (np.asarray(test_prob) >= thr).astype(int)
        figures[f"confusion_matrix_top{k}"] = plot_confusion_matrix(
            test_true,
            y_pred_k,
            title=f"Confusion Matrix (Top {k}%, thr={thr:.5f})",
            labels=("비이탈(m1)", "이탈(m2)"),
            cmap="Blues",
        )
    return figures



# data load / split

DATA_DIR = PROJECT_ROOT / "data" / "processed"
features = pd.read_parquet(DATA_DIR / "features_ml_clean.parquet")
labels = pd.read_parquet(DATA_DIR / "labels.parquet")

features["user_id"] = features["user_id"].astype(str)
labels["user_id"] = labels["user_id"].astype(str)

df = features.merge(
    labels[["user_id", "anchor_time", "label", "split"]],
    on=["user_id", "anchor_time"],
    how="inner",
    validate="one_to_one",
)
df["y"] = (df["label"] == "m2").astype(int)

feature_cols = [c for c in df.columns if c not in ["user_id", "anchor_time", "label", "split", "y"]]

train_df = df[df["split"] == "train"]
val_df = df[df["split"] == "val"]
test_df = df[df["split"] == "test"]

X_train, y_train = train_df[feature_cols], train_df["y"].to_numpy()
X_val, y_val = val_df[feature_cols], val_df["y"].to_numpy()
X_test, y_test = test_df[feature_cols], test_df["y"].to_numpy()

X_tv = pd.concat([train_df[feature_cols], val_df[feature_cols]], axis=0)
y_tv = pd.concat([train_df["y"], val_df["y"]], axis=0).to_numpy()


def fit_predict(model_params: dict, use_weights: bool, pos_weight: float | None):
    model = HistGradientBoostingClassifier(**model_params)
    if use_weights:
        w = np.ones_like(y_train, dtype=float)
        w[y_train == 1] = float(pos_weight)
        model.fit(X_train, y_train, sample_weight=w)
    else:
        model.fit(X_train, y_train)
    val_prob = model.predict_proba(X_val)[:, 1]
    val_pr_auc = float(average_precision_score(y_val, val_prob))
    return model, val_pr_auc


def tune_version(version_name: str, search_space: dict, n_trials=25, seed=42):
    rng = np.random.default_rng(seed)
    best = {"val_pr_auc": -1.0, "params": None}

    for _ in range(n_trials):
        params = {}
        for k, choices in search_space.items():
            choice = rng.choice(choices)
            if k in {"max_iter", "min_samples_leaf", "max_bins", "max_leaf_nodes", "n_iter_no_change"}:
                params[k] = int(choice)
            elif k == "max_depth":
                params[k] = None if choice is None else int(choice)
            elif k == "early_stopping":
                params[k] = bool(choice)
            elif k in {"learning_rate", "l2_regularization", "validation_fraction"}:
                params[k] = float(choice)
            elif k == "pos_weight":
                params[k] = float(choice)
            else:
                params[k] = choice

        pos_weight = params.pop("pos_weight", None)
        use_weights = pos_weight is not None and pos_weight > 1.0

        base_params = dict(params)
        base_params["random_state"] = 42

        model, val_pr_auc = fit_predict(base_params, use_weights=use_weights, pos_weight=pos_weight)

        if val_pr_auc > best["val_pr_auc"]:
            best = {"val_pr_auc": val_pr_auc, "params": dict(base_params), "pos_weight": pos_weight}

    print(f"[{version_name}] best_val_pr_auc:", best["val_pr_auc"])
    return best


def train_eval_save(version_name: str, best: dict, model_id="ml__hgb", split="test"):
    pos_weight = best.get("pos_weight", None)
    use_weights = pos_weight is not None and pos_weight > 1.0

    model_params = dict(best["params"])

    hgb = HistGradientBoostingClassifier(**model_params)

    if use_weights:
        y_tv_arr = np.asarray(y_tv).astype(int)
        w_tv = np.ones_like(y_tv_arr, dtype=float)
        w_tv[y_tv_arr == 1] = float(pos_weight)
        hgb.fit(X_tv, y_tv_arr, sample_weight=w_tv)
    else:
        hgb.fit(X_tv, np.asarray(y_tv).astype(int))

    test_prob = hgb.predict_proba(X_test)[:, 1]
    test_true = np.asarray(y_test).astype(int)

    metrics_payload = build_ranking_metrics(test_true, test_prob)
    pr_auc_val = float(metrics_payload["PR-AUC (Average Precision)"])

    stem = f"{_stem_from_model_id(model_id)}_{version_name}"

    sp = score_percentiles_payload(model_id, split, test_prob)
    save_json_to_metrics_dir(stem, "score_percentiles", sp)

    save_json_to_metrics_dir(stem, "metrics", metrics_payload)

    tuning_payload = {
        "model_id": model_id,
        "split": "val",
        "best_val_pr_auc": float(best["val_pr_auc"]),
        "best_params": best["params"],
        "pos_weight": best.get("pos_weight", None),
        "version": version_name,
    }
    save_json_to_metrics_dir(stem, "tuning", tuning_payload)

    figures = make_figures(test_true, test_prob, pr_auc_val, k_list=(5, 10, 15, 30))

    saved = save_model_and_artifacts(
        model=hgb,
        model_name="hgb",
        model_type="ml",
        model_id=model_id,
        split=split,
        metrics=metrics_payload,
        y_true=test_true,
        y_prob=np.asarray(test_prob).astype(float),
        version=version_name,
        scaler=None,
        figures=figures,
    )

    plt.close(figures["pr_curve"])
    for k in (5, 10, 15, 30):
        plt.close(figures[f"confusion_matrix_top{k}"])

    print(f"[{version_name}] saved paths:")
    for k, v in saved.items():
        print(f"{k}: {v}")

    return {"version": version_name, "test_pr_auc": pr_auc_val, "saved": saved}



# Version 1: baseline

space_v1 = {
    "learning_rate": [0.03, 0.05, 0.07],
    "max_depth": [3, 4, 5, 6],
    "max_iter": [300, 500],
    "min_samples_leaf": [20, 50, 100],
    "l2_regularization": [0.0, 1e-4, 1e-3, 1e-2],
    "max_bins": [128, 255],
}

best_v1 = tune_version("v1_baseline", space_v1, n_trials=25, seed=42)
out_v1 = train_eval_save("v1_baseline", best_v1)



# Version 2: sample_weight(pos_weight)

neg = float((y_train == 0).sum())
pos = float((y_train == 1).sum())
ratio = neg / (pos + 1e-12)

space_v2 = {
    "learning_rate": [0.03, 0.05, 0.07],
    "max_depth": [3, 4, 5, 6],
    "max_iter": [300, 500],
    "min_samples_leaf": [20, 50, 100],
    "l2_regularization": [0.0, 1e-4, 1e-3, 1e-2],
    "max_bins": [128, 255],
    "pos_weight": [1.0, 2.0, 5.0, 10.0, ratio],
}

best_v2 = tune_version("v2_weighted", space_v2, n_trials=25, seed=43)
out_v2 = train_eval_save("v2_weighted", best_v2)



# Version 3: weight + max_leaf_nodes + early_stopping

space_v3 = {
    "learning_rate": [0.02, 0.03, 0.05, 0.07],
    "max_depth": [None, 3, 4, 5, 6],
    "max_leaf_nodes": [15, 31, 63, 127],
    "max_iter": [500, 800],
    "min_samples_leaf": [20, 50, 100],
    "l2_regularization": [0.0, 1e-4, 1e-3, 1e-2, 1e-1],
    "max_bins": [128, 255],
    "early_stopping": [True],
    "validation_fraction": [0.05, 0.1],
    "n_iter_no_change": [10, 20],
    "pos_weight": [2.0, 5.0, 10.0, ratio],
}

best_v3 = tune_version("v3_leaf_es", space_v3, n_trials=25, seed=44)
out_v3 = train_eval_save("v3_leaf_es", best_v3)

def _get_rank_item(metrics_payload: dict, k_pct: int):
    target = f"{k_pct}%"
    for row in metrics_payload.get("ranking", []):
        if row.get("Top_K") == target:
            return row
    return None

def _selection_score(metrics_payload: dict):
    pr = float(metrics_payload.get("PR-AUC (Average Precision)", 0.0))

    r5 = _get_rank_item(metrics_payload, 5)
    r10 = _get_rank_item(metrics_payload, 10)
    r30 = _get_rank_item(metrics_payload, 30)

    recall5 = float(r5["Recall"]) if r5 else 0.0
    recall10 = float(r10["Recall"]) if r10 else 0.0
    recall30 = float(r30["Recall"]) if r30 else 0.0

    lift5 = float(r5["Lift"]) if r5 else 0.0
    lift10 = float(r10["Lift"]) if r10 else 0.0

    # 기본 점수 (PR-AUC 중심 + recall@10/30 강조 + lift 약간)
    score = (
        0.55 * pr
        + 0.20 * recall10
        + 0.15 * recall30
        + 0.05 * recall5
        + 0.03 * lift10
        + 0.02 * lift5
    )
    return float(score)

def summarize_version(version_obj):
    version = version_obj["version"]
    # 우리가 train_eval_save에서 metrics를 metrics_dir에 저장했지만,
    # 여기서는 빠르게 다시 계산/기록하려고 out_v*에 test_pr_auc만 담겨있음.
    # 따라서 metrics_payload는 파일로 저장된 것에서 다시 읽는 대신
    # train_eval_save 내부에서 metrics_payload도 같이 반환하도록 하는 게 가장 깔끔하지만,
    # 지금은 saved dict 기반으로 metrics 파일 경로를 찾아 읽도록 처리한다.
    return version

# metrics 파일 다시 로드해서 비교 (버전별 stem 규칙: hgb_<version>_metrics.json)
METRICS_DIR = PROJECT_ROOT / "models" / "metrics"

def load_metrics_for(version_name: str, model_id="ml__hgb"):
    stem = f"{_stem_from_model_id(model_id)}_{version_name}"
    path = METRICS_DIR / f"{stem}_metrics.json"
    payload = json.loads(path.read_text(encoding="utf-8"))
    return path, payload

candidates = []
for v in ["v1_baseline", "v2_weighted", "v3_leaf_es"]:
    m_path, m_payload = load_metrics_for(v, model_id="ml__hgb")
    score = _selection_score(m_payload)

    r5 = _get_rank_item(m_payload, 5) or {}
    r10 = _get_rank_item(m_payload, 10) or {}
    r30 = _get_rank_item(m_payload, 30) or {}

    candidates.append({
        "version": v,
        "score_for_selection": score,
        "pr_auc": float(m_payload.get("PR-AUC (Average Precision)", 0.0)),
        "recall@5": float(r5.get("Recall", 0.0)),
        "recall@10": float(r10.get("Recall", 0.0)),
        "recall@30": float(r30.get("Recall", 0.0)),
        "lift@5": float(r5.get("Lift", 0.0)),
        "lift@10": float(r10.get("Lift", 0.0)),
        "metrics_path": str(m_path),
    })

# 점수 기준으로 정렬
candidates_sorted = sorted(candidates, key=lambda x: x["score_for_selection"], reverse=True)

# 콘솔 비교표 출력
print("\n===== HGB 3버전 비교 =====")
for row in candidates_sorted:
    print(
        f"- {row['version']}: "
        f"score={row['score_for_selection']:.6f} | "
        f"PR-AUC={row['pr_auc']:.6f} | "
        f"R@5={row['recall@5']:.6f} | R@10={row['recall@10']:.6f} | R@30={row['recall@30']:.6f} | "
        f"Lift@5={row['lift@5']:.6f} | Lift@10={row['lift@10']:.6f}\n"
        f"  metrics: {row['metrics_path']}"
    )

winner = candidates_sorted[0]

final_selection = {
    "model_id": "ml__hgb",
    "selected_version": winner["version"],
    "selection_score": winner["score_for_selection"],
    "criteria": {
        "weights": {
            "PR-AUC": 0.55,
            "Recall@10": 0.20,
            "Recall@30": 0.15,
            "Recall@5": 0.05,
            "Lift@10": 0.03,
            "Lift@5": 0.02,
        },
        "notes": "VAL로 튜닝하고 TEST는 1회만 평가한 결과(metrics.json) 기반으로 최종 선택",
    },
    "winner_metrics_path": winner["metrics_path"],
}

final_path = METRICS_DIR / "hgb_final_selection.json"
final_path.write_text(json.dumps(final_selection, ensure_ascii=False, indent=2), encoding="utf-8")

print("\n===== 최종 선택 =====")
print(json.dumps(final_selection, ensure_ascii=False, indent=2))
print(str(final_path))

[v1_baseline] best_val_pr_auc: 0.9169934267257506
{
  "model_id": "ml__hgb",
  "split": "test",
  "percentiles": [
    {
      "pct": 1,
      "score": 0.9489377643618517
    },
    {
      "pct": 5,
      "score": 0.939537315663729
    },
    {
      "pct": 10,
      "score": 0.9335220927435196
    },
    {
      "pct": 20,
      "score": 0.9177193105638359
    },
    {
      "pct": 30,
      "score": 0.8983512475413901
    },
    {
      "pct": 50,
      "score": 0.8581209789815387
    }
  ]
}
/Users/jy/project_2nd/SKN23-2nd-3Team/models/metrics/hgb_v1_baseline_score_percentiles.json
{
  "PR-AUC (Average Precision)": 0.9332246854198079,
  "상위 5% 정밀도 (Precision)": 0.966214889019839,
  "상위 5% 재현율 (Recall)": 0.05570655251296686,
  "상위 5% 리프트 (Lift)": 1.1142732983800736,
  "ranking": [
    {
      "Top_K": "5%",
      "Precision": 0.966214889019839,
      "Recall": 0.05570655251296686,
      "Lift": 1.1142732983800736
    },
    {
      "Top_K": "10%",
      "Precision": 0.96258469999017