In [12]:
import pandas as pd
import matplotlib.pyplot as plt

# 데이터 로드
data = pd.read_csv('./dataset/convert.csv')

In [14]:
data = data.drop(columns=['ServiceArea'])

# 컬럼 삭제 확인
print(data.head())

   Churn  MonthlyRevenue  MonthlyMinutes  TotalRecurringCharge  \
0      1           24.00           219.0                  22.0   
1      1           16.99            10.0                  17.0   
2      0           38.00             8.0                  38.0   
3      0           82.28          1312.0                  75.0   
4      1           17.14             0.0                  17.0   

   DirectorAssistedCalls  OverageMinutes  RoamingCalls  PercChangeMinutes  \
0                   0.25             0.0           0.0             -157.0   
1                   0.00             0.0           0.0               -4.0   
2                   0.00             0.0           0.0               -2.0   
3                   1.24             0.0           0.0              157.0   
4                   0.00             0.0           0.0                0.0   

   PercChangeRevenues  DroppedCalls  ...  PrizmCode_Suburban  PrizmCode_Town  \
0               -19.0           0.7  ...                 1.0

In [21]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader 
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 특징과 레이블 분리
X = data.drop(columns=['Churn']).values
y = data['Churn'].values

# 학습/검증 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 정규화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# PyTorch 텐서로 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

print(X_train_tensor.shape, y_train_tensor.shape)  # 텐서 크기 확인


torch.Size([40524, 52]) torch.Size([40524, 1])


In [22]:
import torch.nn as nn

# 모델 정의
class BinaryClassificationModel(nn.Module):
    def __init__(self, input_dim):
        super(BinaryClassificationModel, self).__init__()
        # 1. 은닉층 정의
        self.layer1 = nn.Linear(input_dim, 64)  # 입력 차원 -> 64차원 은닉층
        self.layer2 = nn.Linear(64, 32)         # 64차원 -> 32차원 은닉층
        
        # 2. 출력층 정의 (이진 분류이므로 출력 차원은 1)
        self.output = nn.Linear(32, 1)
        
        # 활성화 함수 정의
        self.relu = nn.ReLU()     # 은닉층에 사용할 ReLU 활성화 함수
        self.sigmoid = nn.Sigmoid()  # 이진 분류 출력을 위한 Sigmoid 함수

    def forward(self, x):
        # 입력 x를 각 계층을 통과시키며 순전파 수행
        x = self.relu(self.layer1(x))   # 첫 번째 은닉층을 통과하고 ReLU 적용
        x = self.relu(self.layer2(x))   # 두 번째 은닉층을 통과하고 ReLU 적용
        x = self.sigmoid(self.output(x))  # 출력층 통과 후 Sigmoid로 확률로 변환
        return x
# 모델 초기화
input_dim = X_train_tensor.shape[1]  # 입력 차원 설정 (특징 개수)
model = BinaryClassificationModel(input_dim)

In [23]:
import torch.optim as optim

# 손실 함수와 최적화기 정의
criterion = nn.BCELoss()  # 이진 교차 엔트로피 손실 함수
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
# 학습 설정
num_epochs = 1000
batch_size = 32

# 학습 루프
for epoch in range(num_epochs):
    model.train()  # 모델을 학습 모드로 설정
    epoch_loss = 0.0

    # 미니배치 학습
    for i in range(0, X_train_tensor.size(0), batch_size):
        batch_X = X_train_tensor[i:i+batch_size]
        batch_y = y_train_tensor[i:i+batch_size]

        # 순전파
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        # 역전파 및 최적화
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # 에포크별 손실 출력
    if epoch % 10 == 9:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(X_train_tensor):.4f}")


Epoch [10/1000], Loss: 0.0130
Epoch [20/1000], Loss: 0.0130
Epoch [30/1000], Loss: 0.0130
Epoch [40/1000], Loss: 0.0130
Epoch [50/1000], Loss: 0.0129
Epoch [60/1000], Loss: 0.0129
Epoch [70/1000], Loss: 0.0130
Epoch [80/1000], Loss: 0.0129
Epoch [90/1000], Loss: 0.0129
Epoch [100/1000], Loss: 0.0129
Epoch [110/1000], Loss: 0.0129
Epoch [120/1000], Loss: 0.0130
Epoch [130/1000], Loss: 0.0130
Epoch [140/1000], Loss: 0.0129
Epoch [150/1000], Loss: 0.0129
Epoch [160/1000], Loss: 0.0129
Epoch [170/1000], Loss: 0.0129
Epoch [180/1000], Loss: 0.0129
Epoch [190/1000], Loss: 0.0129
Epoch [200/1000], Loss: 0.0129
Epoch [210/1000], Loss: 0.0128
Epoch [220/1000], Loss: 0.0128
Epoch [230/1000], Loss: 0.0128
Epoch [240/1000], Loss: 0.0129
Epoch [250/1000], Loss: 0.0128
Epoch [260/1000], Loss: 0.0129
Epoch [270/1000], Loss: 0.0129
Epoch [280/1000], Loss: 0.0128
Epoch [290/1000], Loss: 0.0130
Epoch [300/1000], Loss: 0.0128
Epoch [310/1000], Loss: 0.0129
Epoch [320/1000], Loss: 0.0129
Epoch [330/1000],