In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
# 데이터 로드
df = pd.read_csv('employee.csv')

# 타겟과 피처 분리
target_col = 'Resigned'
X = df.drop(columns=[target_col])
y = df[target_col]

# 범주형 데이터 라벨 인코딩
for col in X.select_dtypes(include='object').columns:
    X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# SMOTE 오버샘플링

# NearMiss 언더샘플링
#nearmiss = NearMiss()
#X_balanced, y_balanced = nearmiss.fit_resample(X_resampled, y_resampled)

# 학습/테스트 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 랜덤 오버샘플링 적용(소수 클래스 늘리기)
ros = RandomOverSampler(random_state=42)
X_train_res, y_train_res = ros.fit_resample(X_train, y_train)

print('Train 분포 후:', Counter(y_train_res))

# 파이프라인 구성 및 학습
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', MLPRegressor(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42))
])
pipeline.fit(X_train, y_train)

# 평가
y_pred = pipeline.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"R² score: {r2:.4f}")


Train 분포 후: Counter({True: 71961, False: 71961})
R² score: -0.2027


In [4]:
import os
os.environ["MKL_THREADING_LAYER"] = "GNU"


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import f1_score, mean_squared_error
from imblearn.over_sampling import SMOTE
import torch
import torch.nn as nn
import torch.optim as optim

# 1. 데이터 로드
df = pd.read_csv('employee.csv')

# 2. 전처리
df = df.dropna()  # 결측값 제거

# Label Encoding for categorical columns
for col in df.select_dtypes(include='object').columns:
    df[col] = LabelEncoder().fit_transform(df[col])

# Target 분리
X = df.drop('Resigned', axis=1)
y = df['Resigned'].astype(int)

# 3. SMOTE 오버샘플링
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 4. 데이터 스케일링 및 분할
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# 5. Torch Tensor 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)



# 6. 딥러닝 모델 정의
class ResignPredictorV2(nn.Module):
    def __init__(self, input_dim):
        super(ResignPredictorV2, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.act1 = nn.LeakyReLU()

        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.act2 = nn.LeakyReLU()

        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.act3 = nn.LeakyReLU()

        self.fc_out = nn.Linear(32, 1)

    def forward(self, x):
        x = self.act1(self.bn1(self.fc1(x)))
        x = self.act2(self.bn2(self.fc2(x)))
        x = self.act3(self.bn3(self.fc3(x)))
        return self.fc_out(x)  # No sigmoid!


model = ResignPredictorV2(X_train.shape[1])
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# 6. EarlyStopping 구현
patience = 10  # 검증 손실 개선 없을 때 기다리는 최대 epoch 수
best_val_loss = float('inf')
epochs_no_improve = 0
num_epochs = 2000  # 최대 에폭 수

# 7. 학습
epochs = 3000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 150 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    logits = model(X_test_tensor)
    probs = torch.sigmoid(logits).numpy()
    preds = (probs > 0.5).astype(int)

f1 = f1_score(y_test, preds)


print(f"\n📊 F1 Score: {f1:.4f}")


Epoch 150/3000, Loss: 0.4303
Epoch 300/3000, Loss: 0.3576
Epoch 450/3000, Loss: 0.3259
Epoch 600/3000, Loss: 0.3050
Epoch 750/3000, Loss: 0.2902
Epoch 900/3000, Loss: 0.2796
Epoch 1050/3000, Loss: 0.2719
Epoch 1200/3000, Loss: 0.2644
Epoch 1350/3000, Loss: 0.2587
Epoch 1500/3000, Loss: 0.2531
Epoch 1650/3000, Loss: 0.2498
Epoch 1800/3000, Loss: 0.2474
Epoch 1950/3000, Loss: 0.2432
Epoch 2100/3000, Loss: 0.2400
Epoch 2250/3000, Loss: 0.2375
Epoch 2400/3000, Loss: 0.2359
Epoch 2550/3000, Loss: 0.2344
Epoch 2700/3000, Loss: 0.2322
Epoch 2850/3000, Loss: 0.2302
Epoch 3000/3000, Loss: 0.2296

📊 F1 Score: 0.8179


In [5]:
from sklearn.metrics import classification_report

report = classification_report(y_test, preds, target_names=["재직자(0)", "퇴사자(1)"])
print("\n📋 Classification Report:\n", report)



📋 Classification Report:
               precision    recall  f1-score   support

      재직자(0)       0.83      0.79      0.81     17998
      퇴사자(1)       0.80      0.84      0.82     17998

    accuracy                           0.81     35996
   macro avg       0.81      0.81      0.81     35996
weighted avg       0.81      0.81      0.81     35996

