In [8]:
# === Config =========================================================
CSV_PATH = '../data/training/final/travel_insight_pruned.csv'
TARGET   = "IS_FAILED_TRIP"
ID_COLS  = ["TRAVEL_ID"]                           # 학습에서 제외(누수/과적합 위험)
CAT_COLS = ["TRAVEL_SEASON", "activity_type_catboost"]  # CatBoost 범주형

# 클러스터/거리 피처 생성에 사용할 컬럼(네가 보낸 목록)
CLUSTERING_FEATURES = [
    'TRAVEL_LENGTH','activity_payment_sum','visit_move_cnt',
    'TRAVEL_SEASON_encoded','activity_type_1','activity_type_2','activity_type_3',
    'activity_type_4','activity_type_5','activity_type_6','activity_type_7','activity_type_99',
    'activity_history_rows','visit_move_cnt_per_day','activity_payment_sum_per_day',
    'activity_type_1_per_day','activity_type_2_per_day','activity_type_3_per_day',
    'activity_type_4_per_day','activity_type_5_per_day','activity_type_6_per_day',
    'activity_type_7_per_day','activity_type_99_per_day','activity_history_rows_per_day'
]

# CatBoost 기본 파라미터(PR-AUC 최적화용; 필요시 네 튜닝값 넣어도 됨)
CB_PARAMS = dict(
    loss_function="Logloss",
    eval_metric="PRAUC",
    depth=6,
    iterations=3000,
    learning_rate=0.03,
    l2_leaf_reg=6.0,
    min_data_in_leaf=20,
    rsm=0.9,
    border_count=128,
    bootstrap_type="Bayesian",
    bagging_temperature=1.0,
    random_seed=42,
    verbose=False,
    early_stopping_rounds=100,
    # 둘 중 하나만 사용: 아래 한 줄 주석 해제(자동) 또는 class_weights 사용(수동)
    auto_class_weights="Balanced",
    # class_weights=[1.0, 4.0],
)

# === Imports ========================================================
import warnings, numpy as np, pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from catboost import CatBoostClassifier

# UMAP (설치 안 되어 있으면 자동 스킵)
try:
    import umap
    UMAP_AVAILABLE = True
except Exception:
    UMAP_AVAILABLE = False

warnings.filterwarnings("ignore")

# === Load ===========================================================
df = pd.read_csv(CSV_PATH, low_memory=False)
assert TARGET in df.columns, f"Target '{TARGET}' not found."

# 모델 입력 기본셋 구성
X_all = df.drop(columns=[TARGET] + [c for c in ID_COLS if c in df.columns]).copy()
y_all = df[TARGET].astype(int).values

# cat_features index (CatBoost에서 필요)
cat_features = [X_all.columns.get_loc(c) for c in CAT_COLS if c in X_all.columns]

# === Helper: fold 안에서 파생특징 생성(누수 방지) =======================
def fit_transform_cluster_features(X_tr, y_tr, X_va, features,
                                   k_success=3, k_fail=3,
                                   use_lof=False, use_if=True, use_umap=UMAP_AVAILABLE,
                                   random_state=42):
    """
    X_tr, X_va는 DataFrame (원-스케일). features는 수치형 열 리스트.
    반환: (X_tr_aug, X_va_aug) - 원본 + 새 피처 컬럼 결합
    """
    feats = [f for f in features if f in X_tr.columns]
    Xtr_f = X_tr[feats].fillna(0.0).astype(float).copy()
    Xva_f = X_va[feats].fillna(0.0).astype(float).copy()

    # 표준화는 train 기준으로만 fit
    scaler = StandardScaler()
    Xtr_z = scaler.fit_transform(Xtr_f)
    Xva_z = scaler.transform(Xva_f)

    # 성공/실패로 분리
    tr_succ = Xtr_z[y_tr==0]
    tr_fail = Xtr_z[y_tr==1]

    # --- KMeans: 성공/실패 각각 ---
    def km_block(data, k):
        # 표본 수가 k 보다 작으면 k 줄이기
        kk = int(min(max(k,1), max(1, len(data))))
        return KMeans(n_clusters=kk, n_init="auto", random_state=random_state).fit(data)

    km_s = km_block(tr_succ, k_success)
    km_f = km_block(tr_fail,  k_fail)

    # 거리와 argmin
    def km_feats(km, Z):
        # 유클리드 거리
        d = np.linalg.norm(Z[:,None,:] - km.cluster_centers_[None,:,:], axis=2)  # (n, k)
        d_min = d.min(axis=1)
        argmin = d.argmin(axis=1)
        return d, d_min, argmin

    d_s_tr, smin_tr, sidx_tr = km_feats(km_s, Xtr_z)
    d_f_tr, fmin_tr, fidx_tr = km_feats(km_f, Xtr_z)
    d_s_va, smin_va, sidx_va = km_feats(km_s, Xva_z)
    d_f_va, fmin_va, fidx_va = km_feats(km_f, Xva_z)

    # 마진(양수면 성공 쪽에 더 가까움)
    margin_tr = fmin_tr - smin_tr
    margin_va = fmin_va - smin_va

    # one-hot 대신 소수 피처만: 가장 가까운 거리 + argmin(정수), 마진
    X_tr_new = pd.DataFrame({
        "s_min_dist": smin_tr,
        "f_min_dist": fmin_tr,
        "km_margin": margin_tr,
        "s_argmin": sidx_tr.astype(int),
        "f_argmin": fidx_tr.astype(int),
    }, index=X_tr.index)
    X_va_new = pd.DataFrame({
        "s_min_dist": smin_va,
        "f_min_dist": fmin_va,
        "km_margin": margin_va,
        "s_argmin": sidx_va.astype(int),
        "f_argmin": fidx_va.astype(int),
    }, index=X_va.index)

    # --- 이상치 점수(선택) ---
    if use_if:
        # 각 클래스 별로 학습 → 전체에 적용(점수 부호 맞춤)
        IF_s = IsolationForest(n_estimators=200, contamination="auto", random_state=random_state)
        IF_f = IsolationForest(n_estimators=200, contamination="auto", random_state=random_state)
        IF_s.fit(tr_succ)
        IF_f.fit(tr_fail)
        X_tr_new["if_success"] = IF_s.score_samples(Xtr_z)
        X_tr_new["if_fail"]    = IF_f.score_samples(Xtr_z)
        X_va_new["if_success"] = IF_s.score_samples(Xva_z)
        X_va_new["if_fail"]    = IF_f.score_samples(Xva_z)

    if use_lof:
        # LOF은 fit_predict 방식이라 transform이 번거롭고 속도 느릴 수 있음(옵션)
        lof = LocalOutlierFactor(n_neighbors=25, novelty=True)
        lof.fit(Xtr_z)
        X_tr_new["lof_score"] = lof.score_samples(Xtr_z)
        X_va_new["lof_score"] = lof.score_samples(Xva_z)

    # --- UMAP 좌표(선택) ---
    if use_umap:
        try:
            um = umap.UMAP(n_neighbors=30, min_dist=0.2, n_components=2, random_state=random_state)
            um.fit(Xtr_z)
            um_tr = um.transform(Xtr_z)
            um_va = um.transform(Xva_z)
            X_tr_new["umap_x"] = um_tr[:,0]; X_tr_new["umap_y"] = um_tr[:,1]
            X_va_new["umap_x"] = um_va[:,0]; X_va_new["umap_y"] = um_va[:,1]
        except Exception as e:
            print(f"[UMAP skipped] {e}")

    # 원본 X와 수치형 새 피처 결합 (범주형 cat_features는 원본 컬럼 그대로 사용)
    X_tr_aug = pd.concat([X_tr.reset_index(drop=True), X_tr_new.reset_index(drop=True)], axis=1)
    X_va_aug = pd.concat([X_va.reset_index(drop=True), X_va_new.reset_index(drop=True)], axis=1)
    return X_tr_aug, X_va_aug

# === 5-fold OOF 평가: 기준선 vs 파생특징 추가 ===========================
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

def oof_prauc(X, y, add_cluster_feats=False):
    oof = np.zeros(len(X))
    for fold, (tr, va) in enumerate(skf.split(X, y), 1):
        X_tr, X_va = X.iloc[tr].copy(), X.iloc[va].copy()
        y_tr, y_va = y[tr], y[va]

        # 클러스터/거리/이상치/UMAP 파생특징(누수방지: fold-train에만 fit)
        if add_cluster_feats:
            X_tr, X_va = fit_transform_cluster_features(
                X_tr, y_tr, X_va, CLUSTERING_FEATURES,
                k_success=3, k_fail=3, use_lof=False, use_if=True, use_umap=UMAP_AVAILABLE
            )

        # CatBoost 학습
        model = CatBoostClassifier(**CB_PARAMS)
        model.fit(
            X_tr, y_tr,
            eval_set=(X_va, y_va),
            cat_features=cat_features,  # 원본 cat_features 인덱스: X_tr/X_va 컬럼 순서가 동일해야 함
            use_best_model=True,
        )
        oof[va] = model.predict_proba(X_va)[:,1]

        ap = average_precision_score(y_va, oof[va])
        print(f"[Fold {fold}] PR-AUC: {ap:.4f} | add_feats={add_cluster_feats}")

    ap_all = average_precision_score(y, oof)
    return ap_all, oof

# 컬럼 정렬 일관성 보장(파생 추가 전 기준)
X_base = X_all.copy()
# cat_features 인덱스는 X_base 기준으로 계산됨
cat_features = [X_base.columns.get_loc(c) for c in CAT_COLS if c in X_base.columns]

print(">>> Baseline (no extra features)")
ap_base, oof_base = oof_prauc(X_base, y_all, add_cluster_feats=False)
print(f"OOF PR-AUC (Baseline): {ap_base:.4f}\n")

print(">>> With cluster/distance/anomaly/umap features")
ap_aug,  oof_aug  = oof_prauc(X_base, y_all, add_cluster_feats=True)
print(f"OOF PR-AUC (Augmented): {ap_aug:.4f}\n")

impr = ap_aug - ap_base
print(f"Δ PR-AUC (Augmented - Baseline): {impr:.4f}")

# 결과 저장(원하면 추후 분석/임곗값 튜닝에 활용)
pd.DataFrame({"oof_base": oof_base, "oof_aug": oof_aug, TARGET: y_all}).to_csv(
    "outputs/00_EDA/oof_probabilities_compare.csv", index=False
)
print("Saved: oof_probabilities_compare.csv")


>>> Baseline (no extra features)
[Fold 1] PR-AUC: 0.3497 | add_feats=False
[Fold 2] PR-AUC: 0.3042 | add_feats=False
[Fold 3] PR-AUC: 0.3231 | add_feats=False
[Fold 4] PR-AUC: 0.3497 | add_feats=False
[Fold 5] PR-AUC: 0.3565 | add_feats=False
OOF PR-AUC (Baseline): 0.3267

>>> With cluster/distance/anomaly/umap features
[Fold 1] PR-AUC: 0.3460 | add_feats=True
[Fold 2] PR-AUC: 0.3198 | add_feats=True
[Fold 3] PR-AUC: 0.3064 | add_feats=True
[Fold 4] PR-AUC: 0.3545 | add_feats=True
[Fold 5] PR-AUC: 0.3557 | add_feats=True
OOF PR-AUC (Augmented): 0.3279

Δ PR-AUC (Augmented - Baseline): 0.0013
Saved: oof_probabilities_compare.csv


In [9]:
# === Sweep: cluster/IF/UMAP 조합 + CatBoost 소탐색 (FIXED) =============
import numpy as np, pandas as pd, warnings
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from catboost import CatBoostClassifier

try:
    import umap
    UMAP_AVAILABLE = True
except Exception:
    UMAP_AVAILABLE = False

warnings.filterwarnings("ignore")

CSV_PATH = "../data/training/final/travel_insight_pruned.csv"
TARGET   = "IS_FAILED_TRIP"
ID_COLS  = ["TRAVEL_ID"]
CAT_COLS = ["TRAVEL_SEASON", "activity_type_catboost"]

CLUSTERING_FEATURES = [
    'TRAVEL_LENGTH','activity_payment_sum','visit_move_cnt',
    'TRAVEL_SEASON_encoded','activity_type_1','activity_type_2','activity_type_3',
    'activity_type_4','activity_type_5','activity_type_6','activity_type_7','activity_type_99',
    'activity_history_rows','visit_move_cnt_per_day','activity_payment_sum_per_day',
    'activity_type_1_per_day','activity_type_2_per_day','activity_type_3_per_day',
    'activity_type_4_per_day','activity_type_5_per_day','activity_type_6_per_day',
    'activity_type_7_per_day','activity_type_99_per_day','activity_history_rows_per_day'
]

df = pd.read_csv(CSV_PATH, low_memory=False)
X = df.drop(columns=[TARGET] + [c for c in ID_COLS if c in df.columns]).copy()
y = df[TARGET].astype(int).values
base_cat_idx = [X.columns.get_loc(c) for c in CAT_COLS if c in X.columns]

def build_cluster_feats(X_tr, y_tr, X_va, features,
                        k_success=3, k_fail=3,
                        use_if=True, use_umap=True, random_state=42):
    """누수 방지를 위해 fold-train에서만 fit하고 val에 transform."""
    feats = [f for f in features if f in X_tr.columns]
    XtrF = X_tr[feats].fillna(0.0).astype(float).copy()
    XvaF = X_va[feats].fillna(0.0).astype(float).copy()

    # (1) 표준화: train fit -> train/val transform
    scaler = StandardScaler().fit(XtrF)
    Ztr = scaler.transform(XtrF)
    Zva = scaler.transform(XvaF)

    # (2) 성공/실패 분리
    tr_succ = Ztr[y_tr==0]
    tr_fail = Ztr[y_tr==1]

    # (3) KMeans: 성공/실패 각각
    def fit_km(Z, k):
        k = int(min(max(k,1), max(1, len(Z))))
        return KMeans(n_clusters=k, n_init="auto", random_state=random_state).fit(Z)

    km_s = fit_km(tr_succ, k_success)
    km_f = fit_km(tr_fail,  k_fail)

    def dmin_argmin(km, Z):
        # 유클리드 거리 to centroids
        d = np.linalg.norm(Z[:, None, :] - km.cluster_centers_[None, :, :], axis=2)
        return d.min(axis=1), d.argmin(axis=1)

    smin_tr, sidx_tr = dmin_argmin(km_s, Ztr); fmin_tr, fidx_tr = dmin_argmin(km_f, Ztr)
    smin_va, sidx_va = dmin_argmin(km_s, Zva); fmin_va, fidx_va = dmin_argmin(km_f, Zva)

    df_tr = pd.DataFrame({
        "s_min_dist": smin_tr,
        "f_min_dist": fmin_tr,
        "km_margin": fmin_tr - smin_tr,   # +면 성공쪽 가까움
        "s_argmin": sidx_tr.astype(int),
        "f_argmin": fidx_tr.astype(int),
    }, index=X_tr.index)

    df_va = pd.DataFrame({
        "s_min_dist": smin_va,
        "f_min_dist": fmin_va,
        "km_margin": fmin_va - smin_va,
        "s_argmin": sidx_va.astype(int),
        "f_argmin": fidx_va.astype(int),
    }, index=X_va.index)

    # (4) 이상치 점수(선택)
    if use_if:
        IF_s = IsolationForest(n_estimators=200, random_state=random_state).fit(tr_succ)
        IF_f = IsolationForest(n_estimators=200, random_state=random_state).fit(tr_fail)
        df_tr["if_success"] = IF_s.score_samples(Ztr); df_tr["if_fail"] = IF_f.score_samples(Ztr)
        df_va["if_success"] = IF_s.score_samples(Zva); df_va["if_fail"] = IF_f.score_samples(Zva)

    # (5) UMAP 좌표(선택)
    if use_umap and UMAP_AVAILABLE:
        try:
            um = umap.UMAP(n_neighbors=30, min_dist=0.2, n_components=2, random_state=random_state).fit(Ztr)
            uv_tr = um.transform(Ztr); uv_va = um.transform(Zva)
            df_tr["umap_x"], df_tr["umap_y"] = uv_tr[:,0], uv_tr[:,1]
            df_va["umap_x"], df_va["umap_y"] = uv_va[:,0], uv_va[:,1]
        except Exception as e:
            print(f"[UMAP skipped] {e}")

    # (6) 원본과 수치 피처를 가로 결합 — axis=1 명시 (FIX)
    X_tr_aug = pd.concat([X_tr.reset_index(drop=True), df_tr.reset_index(drop=True)], axis=1)
    X_va_aug = pd.concat([X_va.reset_index(drop=True), df_va.reset_index(drop=True)], axis=1)
    return X_tr_aug, X_va_aug

def oof_ap(X, y, add_feats, km_s, km_f, use_if, use_umap, cb_params):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    oof = np.zeros(len(X))
    for tr, va in skf.split(X, y):
        X_tr, X_va, y_tr, y_va = X.iloc[tr].copy(), X.iloc[va].copy(), y[tr], y[va]
        if add_feats:
            X_tr, X_va = build_cluster_feats(X_tr, y_tr, X_va, CLUSTERING_FEATURES,
                                             k_success=km_s, k_fail=km_f,
                                             use_if=use_if, use_umap=use_umap)
        # cat_features 인덱스는 현재 DataFrame 기준으로 다시 계산
        cat_idx = [X_tr.columns.get_loc(c) for c in CAT_COLS if c in X_tr.columns]

        model = CatBoostClassifier(**cb_params)
        model.fit(X_tr, y_tr, eval_set=(X_va, y_va),
                  cat_features=cat_idx, use_best_model=True)
        oof[va] = model.predict_proba(X_va)[:,1]
    return average_precision_score(y, oof)

# 가벼운 검색 공간
km_s_list = [2,3,4]
km_f_list = [2,3,4]
use_if_list = [True, False]
use_umap_list = [False, True]  # UMAP 미설치면 내부에서 무시

cb_space = [
    dict(depth=6, iterations=3000, learning_rate=0.03, l2_leaf_reg=6.0, min_data_in_leaf=20,
         rsm=0.9, border_count=128, bootstrap_type="Bayesian", bagging_temperature=1.0,
         loss_function="Logloss", eval_metric="PRAUC", auto_class_weights="Balanced",
         random_seed=42, verbose=False, early_stopping_rounds=100),
    dict(depth=7, iterations=3000, learning_rate=0.03, l2_leaf_reg=4.0, min_data_in_leaf=20,
         rsm=0.9, border_count=128, bootstrap_type="Bayesian", bagging_temperature=2.0,
         loss_function="Logloss", eval_metric="PRAUC", auto_class_weights="Balanced",
         random_seed=42, verbose=False, early_stopping_rounds=100),
    dict(depth=6, iterations=3000, learning_rate=0.03, l2_leaf_reg=6.0, min_data_in_leaf=20,
         rsm=0.9, border_count=128, bootstrap_type="Bernoulli", subsample=0.8,
         loss_function="Logloss", eval_metric="PRAUC", class_weights=[1.0,4.0],
         random_seed=42, verbose=False, early_stopping_rounds=100),
]

# Baseline
ap_base = oof_ap(X, y, add_feats=False, km_s=3, km_f=3, use_if=True, use_umap=False, cb_params=cb_space[0])
print(f"Baseline OOF PR-AUC: {ap_base:.4f}")

# Sweep
results = []
for cb_idx, cbp in enumerate(cb_space):
    for km_s in km_s_list:
        for km_f in km_f_list:
            for use_if in use_if_list:
                for use_umap in use_umap_list:
                    ap = oof_ap(X, y, add_feats=True, km_s=km_s, km_f=km_f,
                                use_if=use_if, use_umap=use_umap, cb_params=cbp)
                    results.append(dict(cb_idx=cb_idx, km_s=km_s, km_f=km_f,
                                        use_if=use_if, use_umap=use_umap, ap=ap))
                    print(f"[cb{cb_idx}] kS={km_s} kF={km_f} IF={use_if} UMAP={use_umap} -> OOF PR-AUC {ap:.4f}")

res_df = pd.DataFrame(results).sort_values("ap", ascending=False)
res_df.to_csv("outputs/00_EDA/cluster_sweep_results.csv", index=False)
print("\nTop-10 configs:")
display(res_df.head(10))
print("Saved: cluster_sweep_results.csv")


Baseline OOF PR-AUC: 0.3267
[cb0] kS=2 kF=2 IF=True UMAP=False -> OOF PR-AUC 0.3291
[cb0] kS=2 kF=2 IF=True UMAP=True -> OOF PR-AUC 0.3291
[cb0] kS=2 kF=2 IF=False UMAP=False -> OOF PR-AUC 0.3284
[cb0] kS=2 kF=2 IF=False UMAP=True -> OOF PR-AUC 0.3284
[cb0] kS=2 kF=3 IF=True UMAP=False -> OOF PR-AUC 0.3185
[cb0] kS=2 kF=3 IF=True UMAP=True -> OOF PR-AUC 0.3185
[cb0] kS=2 kF=3 IF=False UMAP=False -> OOF PR-AUC 0.3268
[cb0] kS=2 kF=3 IF=False UMAP=True -> OOF PR-AUC 0.3268
[cb0] kS=2 kF=4 IF=True UMAP=False -> OOF PR-AUC 0.3240
[cb0] kS=2 kF=4 IF=True UMAP=True -> OOF PR-AUC 0.3240
[cb0] kS=2 kF=4 IF=False UMAP=False -> OOF PR-AUC 0.3304
[cb0] kS=2 kF=4 IF=False UMAP=True -> OOF PR-AUC 0.3304
[cb0] kS=3 kF=2 IF=True UMAP=False -> OOF PR-AUC 0.3272
[cb0] kS=3 kF=2 IF=True UMAP=True -> OOF PR-AUC 0.3272
[cb0] kS=3 kF=2 IF=False UMAP=False -> OOF PR-AUC 0.3298
[cb0] kS=3 kF=2 IF=False UMAP=True -> OOF PR-AUC 0.3298
[cb0] kS=3 kF=3 IF=True UMAP=False -> OOF PR-AUC 0.3279
[cb0] kS=3 kF=3 IF=T

Unnamed: 0,cb_idx,km_s,km_f,use_if,use_umap,ap
55,1,3,3,False,True,0.335666
54,1,3,3,False,False,0.335666
19,0,3,3,False,True,0.334688
18,0,3,3,False,False,0.334688
38,1,2,2,False,False,0.334173
39,1,2,2,False,True,0.334173
23,0,3,4,False,True,0.33366
22,0,3,4,False,False,0.33366
48,1,3,2,True,False,0.331707
49,1,3,2,True,True,0.331707


Saved: cluster_sweep_results.csv


In [10]:
# === Threshold tuning: F1 최대 & 목표 리콜 달성 임곗값 ===================
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_curve, f1_score

# A 단계에서 저장된 oof 파일 사용(없으면 모델 확률 벡터를 바로 넣어도 됨)
df_oof = pd.read_csv("outputs/00_EDA/oof_probabilities_compare.csv")
y = df_oof["IS_FAILED_TRIP"].values
p = df_oof["oof_aug"].values  # 선택: oof_base / oof_aug 등

prec, rec, thr = precision_recall_curve(y, p)
f1 = 2*prec*rec/(prec+rec+1e-9)
best_idx = np.nanargmax(f1[:-1]); best_thr = float(thr[best_idx])
y_hat = (p >= best_thr).astype(int)

print(f"Best F1 threshold: {best_thr:.4f} | F1={f1[best_idx]:.4f}")

# 목표 리콜(예: 0.5, 0.6)을 달성하는 최소 임곗값
for target_recall in [0.4, 0.5, 0.6]:
    idx = np.where(rec[:-1] >= target_recall)[0]
    if len(idx):
        t = float(thr[idx[-1]])  # 가장 높은 임곗값 중 리콜 충족
        y_t = (p >= t).astype(int)
        print(f"Recall≥{target_recall:.1f} -> thr={t:.4f}, "
              f"Precision={ (y[y_t==1].mean() if (y_t==1).any() else 0):.4f}, "
              f"F1={f1_score(y, y_t):.4f}")
    else:
        print(f"Recall≥{target_recall:.1f} 달성 불가")


Best F1 threshold: 0.4183 | F1=0.4491
Recall≥0.4 -> thr=0.5080, Precision=0.3244, F1=0.3583
Recall≥0.5 -> thr=0.5021, Precision=0.3217, F1=0.3922
Recall≥0.6 -> thr=0.5000, Precision=0.3160, F1=0.4166
