In [47]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
from torchsummary import summary
import numpy as np

In [48]:
data = pd.read_csv('./diabetes.csv') 

In [49]:
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [50]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [51]:
X = data.drop(['BMI', 'Outcome'], axis=1).values  
y = data['BMI'].values  

In [52]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

y = scaler.fit_transform(y.reshape(-1, 1))

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [53]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [54]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [55]:
X_train_tensor.shape, y_train_tensor.shape, X_test_tensor.shape, y_test_tensor.shape

(torch.Size([614, 7]),
 torch.Size([614, 1]),
 torch.Size([154, 7]),
 torch.Size([154, 1]))

In [56]:
# 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [57]:
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 0.9493
Epoch 2, Loss: 0.9146
Epoch 3, Loss: 0.7686
Epoch 4, Loss: 0.7266
Epoch 5, Loss: 0.7172
Epoch 6, Loss: 0.7072
Epoch 7, Loss: 0.6672
Epoch 8, Loss: 0.6362
Epoch 9, Loss: 0.6187
Epoch 10, Loss: 0.5993
Epoch 11, Loss: 0.5888
Epoch 12, Loss: 0.5798
Epoch 13, Loss: 0.5824
Epoch 14, Loss: 0.5821
Epoch 15, Loss: 0.5602
Epoch 16, Loss: 0.5465
Epoch 17, Loss: 0.5350
Epoch 18, Loss: 0.5529
Epoch 19, Loss: 0.5279
Epoch 20, Loss: 0.5386
Epoch 21, Loss: 0.5149
Epoch 22, Loss: 0.5289
Epoch 23, Loss: 0.5150
Epoch 24, Loss: 0.5036
Epoch 25, Loss: 0.5219
Epoch 26, Loss: 0.5382
Epoch 27, Loss: 0.4942
Epoch 28, Loss: 0.4868
Epoch 29, Loss: 0.5082
Epoch 30, Loss: 0.4905
Epoch 31, Loss: 0.4989
Epoch 32, Loss: 0.4766
Epoch 33, Loss: 0.5402
Epoch 34, Loss: 0.5586
Epoch 35, Loss: 0.4833
Epoch 36, Loss: 0.4927
Epoch 37, Loss: 0.4690
Epoch 38, Loss: 0.4749
Epoch 39, Loss: 0.4509
Epoch 40, Loss: 0.4633
Epoch 41, Loss: 0.4590
Epoch 42, Loss: 0.4555
Epoch 43, Loss: 0.5004
Epoch 44, Loss: 0.49

In [None]:
from sklearn.metrics import mean_squared_error

model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


NameError: name 'mean_squared_error' is not defined