In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    f1_score, accuracy_score, precision_score, recall_score, roc_auc_score,
    classification_report
)
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

# 📂 1. 데이터 불러오기
df = pd.read_csv('../data/WA_Fn-UseC_-HR-Employee-Attrition.csv')

# 🎯 2. 타겟 인코딩
df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0})

# 🔄 3. 범주형 → 원핫 인코딩
df = pd.get_dummies(df, drop_first=True)

# ✅ 4. 주요 13개 변수 선택
selected_columns = [
    'Age', 'EnvironmentSatisfaction', 'JobInvolvement', 'JobLevel',
    'JobSatisfaction', 'MonthlyIncome', 'OverTime_Yes', 'StockOptionLevel',
    'TotalWorkingYears', 'YearsAtCompany', 'YearsInCurrentRole',
    'YearsWithCurrManager', 'DistanceFromHome'
]
X = df[selected_columns]
y = df['Attrition']

# ⚖️ 5. 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ⚖️ 6. SMOTE 적용
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# 🤖 7. MLP 모델 정의
model = MLPClassifier(
    hidden_layer_sizes=(100,),  # 은닉층 1개, 노드 100개
    activation='relu',          # 활성화 함수
    solver='adam',              # 옵티마이저
    max_iter=300,               # 최대 반복 수
    random_state=42
)

# 🔁 8. Stratified K-Fold 교차검증
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 📊 9. 여러 평가지표로 성능 측정
scoring = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
cv_results = cross_validate(model, X_resampled, y_resampled, cv=cv, scoring=scoring)

# 📈 10. 결과 요약 출력
print("✅ 5-Fold 교차검증 결과")
for metric in scoring:
    print(f"{metric.upper():<10}: {np.mean(cv_results['test_' + metric]):.4f}")


✅ 5-Fold 교차검증 결과
ACCURACY  : 0.8820
PRECISION : 0.8598
RECALL    : 0.9132
F1        : 0.8856
ROC_AUC   : 0.9385
