In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [25]:
# 1. 데이터 불러오기 (예: diabetes.csv 파일)
data = pd.read_csv("D:/diabetes.csv")
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [26]:
# 2. 입력값(X), 타겟값(y) 분리 (예: BMI를 회귀 목표로 가정)
X = data.drop(columns='BMI')  # 임의로 하나 제거 (실제 문제에 따라 다름)
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,0.627,50,1
1,1,85,66,29,0,0.351,31,0
2,8,183,64,0,0,0.672,32,1
3,1,89,66,23,94,0.167,21,0
4,0,137,40,35,168,2.288,33,1


In [27]:
y = data['BMI']  # 우리가 예측하고 싶은 연속 값
y.head()

0    33.6
1    26.6
2    23.3
3    28.1
4    43.1
Name: BMI, dtype: float64

In [28]:
# 3. 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [29]:
# 4. train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train.shape,X_test.shape, y_train.shape, y_test.shape

((614, 8), (154, 8), (614,), (154,))

In [30]:
# 5. Tensor 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [31]:
# 6. Dataset, DataLoader 생성
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [32]:
# 7. 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

input_dim = X_train.shape[1]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel(input_dim).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [33]:
# 8. 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 1083.6181
Epoch 2, Loss: 1017.4736
Epoch 3, Loss: 962.0261
Epoch 4, Loss: 825.3861
Epoch 5, Loss: 646.1489
Epoch 6, Loss: 433.5093
Epoch 7, Loss: 239.3596
Epoch 8, Loss: 127.8147
Epoch 9, Loss: 90.6962
Epoch 10, Loss: 77.3297
Epoch 11, Loss: 70.5436
Epoch 12, Loss: 66.4925
Epoch 13, Loss: 62.9820
Epoch 14, Loss: 59.6668
Epoch 15, Loss: 59.0113
Epoch 16, Loss: 55.6997
Epoch 17, Loss: 60.3500
Epoch 18, Loss: 54.7828
Epoch 19, Loss: 52.0978
Epoch 20, Loss: 50.6687
Epoch 21, Loss: 50.7596
Epoch 22, Loss: 59.3633
Epoch 23, Loss: 49.8816
Epoch 24, Loss: 48.7492
Epoch 25, Loss: 47.9403
Epoch 26, Loss: 45.8091
Epoch 27, Loss: 44.8987
Epoch 28, Loss: 46.8223
Epoch 29, Loss: 52.3885
Epoch 30, Loss: 44.3103
Epoch 31, Loss: 47.5296
Epoch 32, Loss: 44.3352
Epoch 33, Loss: 42.8173
Epoch 34, Loss: 42.0852
Epoch 35, Loss: 41.4194
Epoch 36, Loss: 41.7172
Epoch 37, Loss: 40.7170
Epoch 38, Loss: 41.5922
Epoch 39, Loss: 40.1931
Epoch 40, Loss: 43.5088
Epoch 41, Loss: 40.7968
Epoch 42, Loss:

In [34]:
# 9. 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print("\n[평가 결과]")
print(f"MSE  : {mse:.4f}")


[평가 결과]
MSE  : 62.1846
