In [17]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
from torchsummary import summary
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [18]:
data = pd.read_csv('./diabetes.csv') 

In [19]:
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [20]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [21]:
# 데이터와 타겟 분리
X = data.drop(['BMI','Outcome'], axis=1).values
y = data['BMI'].values

In [22]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [23]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape, 

((614, 7), (154, 7), (614,), (154,))

In [24]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [25]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)


In [26]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 7), (154, 7), (614,), (154,))

In [27]:
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [28]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 1076.7052
Epoch 2, Loss: 1026.3632
Epoch 3, Loss: 966.8694
Epoch 4, Loss: 832.4641
Epoch 5, Loss: 663.0128
Epoch 6, Loss: 448.8494
Epoch 7, Loss: 263.4546
Epoch 8, Loss: 143.8760
Epoch 9, Loss: 98.7065
Epoch 10, Loss: 90.0690
Epoch 11, Loss: 84.3899
Epoch 12, Loss: 81.4453
Epoch 13, Loss: 77.9140
Epoch 14, Loss: 75.9893
Epoch 15, Loss: 77.5850
Epoch 16, Loss: 70.1904
Epoch 17, Loss: 67.8411
Epoch 18, Loss: 66.2904
Epoch 19, Loss: 64.2373
Epoch 20, Loss: 64.4892
Epoch 21, Loss: 62.5568
Epoch 22, Loss: 63.7333
Epoch 23, Loss: 60.2048
Epoch 24, Loss: 59.5990
Epoch 25, Loss: 64.6823
Epoch 26, Loss: 57.1958
Epoch 27, Loss: 55.9474
Epoch 28, Loss: 54.0152
Epoch 29, Loss: 54.0581
Epoch 30, Loss: 52.0233
Epoch 31, Loss: 51.9259
Epoch 32, Loss: 51.7149
Epoch 33, Loss: 55.8939
Epoch 34, Loss: 50.5430
Epoch 35, Loss: 54.6279
Epoch 36, Loss: 48.0824
Epoch 37, Loss: 47.6983
Epoch 38, Loss: 48.4217
Epoch 39, Loss: 46.2377
Epoch 40, Loss: 46.1076
Epoch 41, Loss: 54.1444
Epoch 42, Loss:

In [29]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")

Test MSE: 53.8768
