## 환경 설정 및 데이터 로드

In [None]:
# ==========================================================
# 02_Model_Training.ipynb - 코드 셀 1
# ==========================================================

# ==========================================================
# 1. 필수 라이브러리 및 모듈 경로 설정
# ==========================================================
import pandas as pd
import numpy as np
import os
import sys

# scripts 폴더 경로 추가 (model_trainer 모듈 사용 가능)
sys.path.append('../scripts')

print("--- 모델 학습 환경 설정 완료 ---")

# ==========================================================
# 2. 전처리된 데이터셋 로드
# ==========================================================
DATA_PATH = '../data/processed/df_final_ml.csv'

df_ml = pd.read_csv(DATA_PATH)

print("\n--- 최종 ML 데이터셋 로드 완료 ---")
print(f"데이터셋 Shape: {df_ml.shape}")
print("주요 컬럼 확인:")

# 로그 변환된 RFM 특성과 타겟 변수 확인
print(df_ml[['R_Recency_log', 'F_Frequency_log', 'M_Monetary_log', 'Target_Churn']].head())

## 모듈 호출 및 모델 학습/평가

In [None]:
# ==========================================================
# 02_Model_Training.ipynb - 코드 셀 2
# ==========================================================

# ==========================================================
# 1. model_trainer 모듈 임포트
# ==========================================================
import model_trainer as mt
print("--- model_trainer 모듈 임포트 완료 ---")

# ==========================================================
# 2. 데이터 분할 (학습/테스트셋 생성)
# ==========================================================
target_col = 'Target_Churn'
X_train, X_test, y_train, y_test = mt.split_data(df_ml, target_col)

# ==========================================================
# 3. 모델 학습 및 평가
# ==========================================================
logistic_model, y_pred, roc_auc = mt.train_and_evaluate_model(X_train, X_test, y_train, y_test)

print("\n--- 모델 학습 및 평가 완료 ---")

## 특성 중요도(계수) 분석 재실행

In [None]:
# ==========================================================
# 02_Model_Training.ipynb - 코드 셀 3
# ==========================================================

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print("--- XGBoost 특성 중요도 분석 시작 ---")

# ==========================================================
# 1. 학습된 XGBoost 모델에서 특성 중요도 추출
# ==========================================================
feature_importances = pd.Series(
    logistic_model.feature_importances_, 
    index=X_train.columns
).sort_values(ascending=False)

# ==========================================================
# 2. 상위 10개 특성 시각화
# ==========================================================
plt.figure(figsize=(10, 6))
sns.barplot(x=feature_importances.head(10).values, y=feature_importances.head(10).index)
plt.title('XGBoost Feature Importance (Top 10)')
plt.xlabel('Importance Score (Gain)')
plt.ylabel('Feature')
plt.show()

# ==========================================================
# 3. 중요도 상위 5개 특성 확인
# ==========================================================
print("\n상위 5개 특성:")
print(feature_importances.head(5))