In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
from torchsummary import summary
import numpy as np

# Outcome 제거, BMI 예측 회기로
# 데이터 불러오기
df = pd.read_csv("https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [None]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [None]:
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for column in columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

In [None]:
# X, Y 분할
df = df.drop('Outcome',axis=1)
X = df.drop('BMI',axis=1)
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,DiabetesPedigreeFunction,Age
0,6,86,22,28,0,350,29
1,1,23,19,22,0,196,10
2,8,121,17,0,0,368,11
3,1,27,19,16,62,53,0
4,0,75,4,28,102,514,12


In [None]:
y = df['BMI']
y.head()

Unnamed: 0,BMI
0,123
1,62
2,30
3,77
4,209


In [None]:
# numpy로 변환
X = X.values
# numpy 변환
Y = y.values

# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)
Y = scaler.fit_transform(Y.reshape(-1, 1))

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 7), (154, 7), (614, 1), (154, 1))

In [None]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [None]:
# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32)

In [None]:
# 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device).float().view(-1, 1)

        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 0.9913
Epoch 2, Loss: 0.8365
Epoch 3, Loss: 0.7754
Epoch 4, Loss: 0.7248
Epoch 5, Loss: 0.6774
Epoch 6, Loss: 0.6445
Epoch 7, Loss: 0.6435
Epoch 8, Loss: 0.6033
Epoch 9, Loss: 0.5921
Epoch 10, Loss: 0.6332
Epoch 11, Loss: 0.5975
Epoch 12, Loss: 0.6088
Epoch 13, Loss: 0.5687
Epoch 14, Loss: 0.5437
Epoch 15, Loss: 0.5292
Epoch 16, Loss: 0.5313
Epoch 17, Loss: 0.5413
Epoch 18, Loss: 0.5382
Epoch 19, Loss: 0.5277
Epoch 20, Loss: 0.5167
Epoch 21, Loss: 0.5028
Epoch 22, Loss: 0.5161
Epoch 23, Loss: 0.4896
Epoch 24, Loss: 0.5178
Epoch 25, Loss: 0.4911
Epoch 26, Loss: 0.4979
Epoch 27, Loss: 0.5176
Epoch 28, Loss: 0.4804
Epoch 29, Loss: 0.4927
Epoch 30, Loss: 0.4866
Epoch 31, Loss: 0.4984
Epoch 32, Loss: 0.4684
Epoch 33, Loss: 0.4734
Epoch 34, Loss: 0.4645
Epoch 35, Loss: 0.4584
Epoch 36, Loss: 0.4614
Epoch 37, Loss: 0.4688
Epoch 38, Loss: 0.4550
Epoch 39, Loss: 0.4738
Epoch 40, Loss: 0.4663
Epoch 41, Loss: 0.4706
Epoch 42, Loss: 0.4463
Epoch 43, Loss: 0.4467
Epoch 44, Loss: 0.46

In [None]:
from sklearn.metrics import mean_squared_error
import math

# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

# mse 확인
mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")

# rmse 확인
rmse = math.sqrt(0.7402)
print(f"RMSE: {rmse:.4f}")

Test MSE: 0.7402
RMSE: 0.8603
