In [5]:
# ==========================================
# AdaBoost 모델 학습 후 Validation 평가
# ==========================================

import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

# -------------------------------
# 1️⃣ 데이터 로드
# -------------------------------
tree_data = pd.read_csv('../data/netflix_customer_churn_tree_preprocessed.csv')

X = tree_data.drop(columns=['churned'])
y = tree_data['churned']

# -------------------------------
# 2️⃣ Train/Test 분할 (8:2)
# -------------------------------
X_train, X_test_val, y_train, y_test_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# -------------------------------
# 3️⃣ Train 내부에서 Validation (5:5)
# -------------------------------
X_val_sub, X_test, y_val_sub, y_test = train_test_split(
    X_test_val, y_test_val, test_size=0.5, stratify=y_test_val, random_state=42
)

# -------------------------------
# 4️⃣ 모델 생성 및 학습
# -------------------------------
adaboost_model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(
        max_depth=4,
        class_weight='balanced',
        criterion='gini',
        random_state=42
    ),
    n_estimators=250,
    learning_rate=0.05,
    random_state=42
)

adaboost_model.fit(X_train_sub, y_train_sub)

# -------------------------------
# 5️⃣ 성능 평가
# -------------------------------
# (0) Train 평가
y_train_pred = adaboost_model.predict(X_train)
train_acc = accuracy_score(y_train, y_train_pred)
# (1) Validation 평가
y_val_pred = adaboost_model.predict(X_val_sub)
val_acc = accuracy_score(y_val_sub, y_val_pred)

# (2) Test 평가
y_test_pred = adaboost_model.predict(X_test)
test_acc = accuracy_score(y_test, y_test_pred)

print("=== 성능 평가 결과 ===")
print(f'Train Accuracy : {train_acc:.4f}')
print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Test Accuracy:       {test_acc:.4f}")
print(f"Overfitting Index:   {(val_acc - train_acc):.4f}")
print("\n[Validation Classification Report]")
print(classification_report(y_val_sub, y_val_pred, target_names=['Not Churned', 'Churned']))
print("\n[Test Classification Report]")
print(classification_report(y_test, y_test_pred, target_names=['Not Churned', 'Churned']))

# # -------------------------------
# # 6️⃣ 모델 저장
# # -------------------------------
# joblib.dump(adaboost_model, './models/adaboost_model_with_val.pth')
# print("\n✅ 모델이 저장되었습니다: ./models/adaboost_model_with_val.pth")


=== 성능 평가 결과 ===
Train Accuracy : 0.9457
Validation Accuracy: 0.9360
Test Accuracy:       0.9240
Overfitting Index:   -0.0097

[Validation Classification Report]
              precision    recall  f1-score   support

 Not Churned       0.92      0.96      0.94       248
     Churned       0.95      0.92      0.94       252

    accuracy                           0.94       500
   macro avg       0.94      0.94      0.94       500
weighted avg       0.94      0.94      0.94       500


[Test Classification Report]
              precision    recall  f1-score   support

 Not Churned       0.91      0.94      0.93       249
     Churned       0.94      0.90      0.92       251

    accuracy                           0.92       500
   macro avg       0.92      0.92      0.92       500
weighted avg       0.92      0.92      0.92       500

