# Gradient Boosting 모델 튜닝 및 저장

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import joblib

# 하이퍼파라미터 그리드 설정
param_grid = {
    'model__n_estimators': [100, 200],
    'model__max_depth': [3, 5],
    'model__learning_rate': [0.05, 0.1],
    'model__subsample': [0.8, 1.0]
}

# 파이프라인 구성
gb_model = Pipeline([
    ('preprocessor', preprocessor),
    ('model', GradientBoostingClassifier(random_state=42))
])

# GridSearchCV로 최적의 모델 찾기
grid_search = GridSearchCV(gb_model, param_grid, cv=3, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_gb_model = grid_search.best_estimator_

print("Best Parameters:", grid_search.best_params_)
print("Best F1 Score (CV):", grid_search.best_score_)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score
import numpy as np

# 확률 예측
y_pred_proba_gb = best_gb_model.predict_proba(X_test)[:, 1]

# threshold 최적화 (Recall ≥ 0.5)
thresholds = np.arange(0.1, 0.95, 0.01)
best_threshold = 0
best_f1 = 0

for t in thresholds:
    y_pred = (y_pred_proba_gb >= t).astype(int)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    if recall >= 0.5 and f1 > best_f1:
        best_f1 = f1
        best_threshold = t

print(f"🎯 최적 Threshold: {best_threshold:.2f} (F1={best_f1:.3f})")

In [None]:
# 최종 예측 및 성능 출력
y_pred_final = (y_pred_proba_gb >= best_threshold).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred_final))
print("Precision:", precision_score(y_test, y_pred_final, zero_division=0))
print("Recall:", recall_score(y_test, y_pred_final, zero_division=0))
print("F1 Score:", f1_score(y_test, y_pred_final, zero_division=0))
print("ROC AUC:", roc_auc_score(y_test, y_pred_proba_gb))

In [None]:
# 최적 모델과 threshold 저장
final_model_package = {
    'model': best_gb_model,
    'threshold': best_threshold
}

joblib.dump(final_model_package, 'best_gradient_boosting_model.pkl')
print("✅ 모델 저장 완료: best_gradient_boosting_model.pkl")