In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [34]:
data = pd.read_csv("C:/Users/admin/Desktop/Homework/AI/AI_Class/Data/diabetes.csv")

data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [36]:
df = data.drop("Outcome", axis=1)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [40]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
dtype: int64

In [42]:
df["BMI"].value_counts()

BMI
32.0    13
31.6    12
31.2    12
0.0     11
32.4    10
        ..
36.7     1
41.8     1
42.6     1
42.8     1
46.3     1
Name: count, Length: 248, dtype: int64

In [50]:
# 입력과 타겟 분리
X = data.drop('BMI', axis=1).values
y = data['BMI'].values

X, y

(array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
        [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
        [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
        ...,
        [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
        [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
        [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]]),
 array([33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31. , 35.3, 30.5,  0. , 37.6,
        38. , 27.1, 30.1, 25.8, 30. , 45.8, 29.6, 43.3, 34.6, 39.3, 35.4,
        39.8, 29. , 36.6, 31.1, 39.4, 23.2, 22.2, 34.1, 36. , 31.6, 24.8,
        19.9, 27.6, 24. , 33.2, 32.9, 38.2, 37.1, 34. , 40.2, 22.7, 45.4,
        27.4, 42. , 29.7, 28. , 39.1,  0. , 19.4, 24.2, 24.4, 33.7, 34.7,
        23. , 37.7, 46.8, 40.5, 41.5,  0. , 32.9, 25. , 25.4, 32.8, 29. ,
        32.5, 42.7, 19.6, 28.9, 32.9, 28.6, 43.4, 35.1, 32. , 24.7, 32.6,
        37.7, 43.2, 25. , 22.4,  0. , 29.3, 24.6, 48.

In [53]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 8), (154, 8), (614,), (154,))

In [105]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [133]:
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 82),
            nn.ReLU(),
            nn.Linear(82, 42),
            nn.ReLU(),
            nn.Linear(42, 22),
            nn.ReLU(),
            nn.Linear(22, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [135]:
model.train()
for epoch in range(150):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch.unsqueeze(1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

print("Training complete.")

Epoch 1, Loss: 1079.2922
Epoch 2, Loss: 1019.2215
Epoch 3, Loss: 852.6365
Epoch 4, Loss: 493.0766
Epoch 5, Loss: 146.9082
Epoch 6, Loss: 87.9128
Epoch 7, Loss: 72.7620
Epoch 8, Loss: 69.4069
Epoch 9, Loss: 64.2343
Epoch 10, Loss: 61.4394
Epoch 11, Loss: 59.3139
Epoch 12, Loss: 58.2184
Epoch 13, Loss: 57.1738
Epoch 14, Loss: 54.8034
Epoch 15, Loss: 60.6897
Epoch 16, Loss: 51.6309
Epoch 17, Loss: 50.6143
Epoch 18, Loss: 48.9389
Epoch 19, Loss: 47.6847
Epoch 20, Loss: 46.3085
Epoch 21, Loss: 48.4884
Epoch 22, Loss: 45.0990
Epoch 23, Loss: 46.0530
Epoch 24, Loss: 44.3170
Epoch 25, Loss: 43.8127
Epoch 26, Loss: 42.7197
Epoch 27, Loss: 41.4271
Epoch 28, Loss: 45.9073
Epoch 29, Loss: 40.2411
Epoch 30, Loss: 40.5726
Epoch 31, Loss: 39.3509
Epoch 32, Loss: 41.6422
Epoch 33, Loss: 38.9265
Epoch 34, Loss: 38.9867
Epoch 35, Loss: 38.0321
Epoch 36, Loss: 38.1050
Epoch 37, Loss: 37.3555
Epoch 38, Loss: 37.6633
Epoch 39, Loss: 38.7222
Epoch 40, Loss: 37.5447
Epoch 41, Loss: 35.8074
Epoch 42, Loss: 35

In [136]:
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 63.7950
