## **DL_Pythorch_diabetes_Regression**

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [2]:
import pandas as pd
import os

# 데이터 불러오기
file_name = 'diabetes.csv'
full_file_path = os.path.join(os.getcwd() + "\\" + file_name)
df = pd.read_csv(full_file_path)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
# 결측치 확인
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [4]:
# 기존 레이블인 Outcome 제거
df = df.drop('Outcome', axis = 1)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [None]:
# 전체 상관계수
correlations = df.corr()  
print(correlations['BMI'].sort_values(ascending = False))

BMI                         1.000000
SkinThickness               0.392573
BloodPressure               0.281805
Glucose                     0.221071
Insulin                     0.197859
DiabetesPedigreeFunction    0.140647
Age                         0.036242
Pregnancies                 0.017683
Name: BMI, dtype: float64


In [5]:
from sklearn.preprocessing import StandardScaler

# 피처, 레이블 데이터
X = df.drop('BMI', axis = 1).values
y = df['BMI'].values

# 데이터 스케일링
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) 
X_train.shape, X_test.shape, y_train.shape, y_test.shape 

((614, 7), (154, 7), (614,), (154,))

In [7]:
# TensorDataset으로 래핑
X_train_tensor = torch.tensor(X_train, dtype = torch.float32)
y_train_tensor = torch.tensor(y_train, dtype = torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype = torch.float32)
y_test_tensor = torch.tensor(y_test, dtype = torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 32)

In [None]:
# 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)


In [9]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 1086.4501
Epoch 2, Loss: 1050.4532
Epoch 3, Loss: 936.2800
Epoch 4, Loss: 767.2215
Epoch 5, Loss: 569.8611
Epoch 6, Loss: 350.4359
Epoch 7, Loss: 183.5774
Epoch 8, Loss: 105.8494
Epoch 9, Loss: 86.2805
Epoch 10, Loss: 80.1958
Epoch 11, Loss: 77.1250
Epoch 12, Loss: 71.7146
Epoch 13, Loss: 72.6140
Epoch 14, Loss: 67.5246
Epoch 15, Loss: 65.7638
Epoch 16, Loss: 64.8507
Epoch 17, Loss: 63.5943
Epoch 18, Loss: 62.0654
Epoch 19, Loss: 61.8182
Epoch 20, Loss: 60.1776
Epoch 21, Loss: 57.5720
Epoch 22, Loss: 56.6236
Epoch 23, Loss: 55.8528
Epoch 24, Loss: 55.8495
Epoch 25, Loss: 54.1082
Epoch 26, Loss: 52.5750
Epoch 27, Loss: 52.2308
Epoch 28, Loss: 57.9142
Epoch 29, Loss: 50.2964
Epoch 30, Loss: 49.6171
Epoch 31, Loss: 49.4410
Epoch 32, Loss: 47.7048
Epoch 33, Loss: 48.0444
Epoch 34, Loss: 47.3471
Epoch 35, Loss: 46.7545
Epoch 36, Loss: 47.0590
Epoch 37, Loss: 51.7505
Epoch 38, Loss: 45.4966
Epoch 39, Loss: 49.6624
Epoch 40, Loss: 51.3147
Epoch 41, Loss: 44.6556
Epoch 42, Loss:

In [11]:
# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 52.1146


In [16]:
from sklearn.metrics import mean_absolute_error, r2_score

mae = mean_absolute_error(actuals, preds)
rmse = np.sqrt(mean_squared_error(actuals, preds))
r2 = r2_score(actuals, preds)

print(f"MAE : {mae:.4f}")    # 평균 절대 오차
print(f"RMSE: {rmse:.4f}")   # 평균 제곱근 오차 (MSE의 루트)
print(f"R²  : {r2:.4f}")     # 설명력 (1에 가까울수록 좋음)


MAE : 5.1315
RMSE: 7.2190
R²  : 0.1220
