In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

# 📂 1. 데이터 불러오기
df = pd.read_csv('../data/WA_Fn-UseC_-HR-Employee-Attrition.csv')

# 🎯 2. 타겟 인코딩
df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0})

# 🔄 3. 범주형 → 원핫 인코딩
df = pd.get_dummies(df, drop_first=True)

# ✅ 4. 주요 13개 변수 선택
selected_columns = [
    'Age', 'EnvironmentSatisfaction', 'JobInvolvement', 'JobLevel',
    'JobSatisfaction', 'MonthlyIncome', 'OverTime_Yes', 'StockOptionLevel',
    'TotalWorkingYears', 'YearsAtCompany', 'YearsInCurrentRole',
    'YearsWithCurrManager', 'DistanceFromHome'
]
X = df[selected_columns]
y = df['Attrition']

# ⚖️ 5. 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ⚖️ 6. SMOTE 적용
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# 🤖 7. 개별 모델 정의
log_clf = LogisticRegression(max_iter=1000, random_state=42)
tree_clf = DecisionTreeClassifier(max_depth=5, random_state=42)
svc_clf = SVC(probability=True, kernel='rbf', random_state=42)

# 🤝 8. VotingClassifier 정의 (soft voting)
voting_clf = VotingClassifier(
    estimators=[
        ('lr', log_clf),
        ('dt', tree_clf),
        ('svc', svc_clf)
    ],
    voting='soft'
)

# 🔁 9. Stratified K-Fold 교차검증
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 📊 10. 여러 평가지표로 성능 측정
scoring = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
cv_results = cross_validate(voting_clf, X_resampled, y_resampled, cv=cv, scoring=scoring)

# 📈 11. 결과 요약 출력
print("✅ VotingClassifier - 5-Fold 교차검증 결과")
for metric in scoring:
    print(f"{metric.upper():<10}: {np.mean(cv_results['test_' + metric]):.4f}")


✅ VotingClassifier - 5-Fold 교차검증 결과
ACCURACY  : 0.8301
PRECISION : 0.8351
RECALL    : 0.8232
F1        : 0.8290
ROC_AUC   : 0.9085
