In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from category_encoders import TargetEncoder

# 예제 데이터 생성
np.random.seed(42)
data = {
    'Profession': np.random.choice(['Engineer', 'Doctor', 'Teacher', 'Artist'], size=100),
    'Degree': np.random.choice(['B.Sc', 'B.Tech', 'M.Sc', 'MBA'], size=100),
    'Depression': np.random.choice([0, 1], size=100, p=[0.7, 0.3])
}

df = pd.DataFrame(data)

# Train/Test Split (연습용으로 동일한 데이터 사용)
train = df.copy()
test = df.sample(frac=0.2, random_state=42)  # 테스트 데이터는 20% 추출

In [None]:
# Stratified K-Fold 설정
n_splits = 5
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# TargetEncoder 설정
target_encoder = TargetEncoder(cols=['Profession', 'Degree'])

# Target Encoding 수행
for fold, (train_index, val_index) in enumerate(kf.split(train, train['Depression'])):
    train_fold = train.iloc[train_index]
    val_fold = train.iloc[val_index]

    # 훈련 데이터로 인코더 학습
    train_encoded = target_encoder.fit_transform(
        train_fold[['Profession', 'Degree']], train_fold['Depression']
    )

    # 검증 데이터는 transform만 수행
    val_encoded = target_encoder.transform(val_fold[['Profession', 'Degree']])

    # 결과 반영
    train.loc[val_index, ['Profession', 'Degree']] = val_encoded

    print(f"Fold {fold + 1} completed.")

# 테스트 데이터 인코딩
test_encoded = target_encoder.transform(test[['Profession', 'Degree']])
test[['Profession', 'Degree']] = test_encoded

In [None]:
# 결과 확인
print("\nEncoded Train Sample:")
print(train.head())
print("\nEncoded Test Sample:")
print(test.head())