In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim


In [2]:
# 데이터 로드
data = pd.read_csv('C:\\AI\\diabetes.csv')

In [3]:
# 범주형 인코딩
# data['Sex'] = LabelEncoder().fit_transform(data['Sex']) 문자일 때만 사용

In [4]:
# Outcome (라벨) 제거 ( 새로운 라벨은 BMI )
data.drop(columns=['Outcome'], inplace=True)

# 입력과 타겟 분리
X = data.drop('BMI', axis=1).values
y = data['BMI'].values.astype(np.float32)

In [5]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# TensorDataset으로 래핑
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [7]:
# 회귀 모델 정의 ( 초기화 함수 따로, 활성화 함수 따로 )
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(7, 64)  # feature 개수 일치하는지 확인
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1) # 출력층 개수 확인. 회귀면 1, 이진분류여도 1

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
# 학습 루프
model.train()
for epoch in range(150):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 1072.8564
Epoch 2, Loss: 1039.6273
Epoch 3, Loss: 955.9292
Epoch 4, Loss: 827.3420
Epoch 5, Loss: 664.3208
Epoch 6, Loss: 437.2649
Epoch 7, Loss: 251.1629
Epoch 8, Loss: 142.7884
Epoch 9, Loss: 103.1214
Epoch 10, Loss: 85.7699
Epoch 11, Loss: 80.8368
Epoch 12, Loss: 76.3044
Epoch 13, Loss: 81.5447
Epoch 14, Loss: 70.1560
Epoch 15, Loss: 67.5790
Epoch 16, Loss: 66.9220
Epoch 17, Loss: 64.9077
Epoch 18, Loss: 64.0950
Epoch 19, Loss: 63.5556
Epoch 20, Loss: 59.9275
Epoch 21, Loss: 59.0125
Epoch 22, Loss: 58.6481
Epoch 23, Loss: 57.9405
Epoch 24, Loss: 56.6631
Epoch 25, Loss: 54.2768
Epoch 26, Loss: 53.7504
Epoch 27, Loss: 52.7637
Epoch 28, Loss: 52.9567
Epoch 29, Loss: 53.1084
Epoch 30, Loss: 55.7417
Epoch 31, Loss: 51.4663
Epoch 32, Loss: 51.9814
Epoch 33, Loss: 49.0067
Epoch 34, Loss: 47.1808
Epoch 35, Loss: 47.7064
Epoch 36, Loss: 54.8491
Epoch 37, Loss: 45.5649
Epoch 38, Loss: 46.9707
Epoch 39, Loss: 48.4790
Epoch 40, Loss: 44.2971
Epoch 41, Loss: 43.3591
Epoch 42, Loss

In [9]:
# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")

Test MSE: 61.7531
