In [58]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim


In [59]:
# 데이터 로드
# data = pd.read_csv('C:\\AI\\diabetes.csv')

In [60]:
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"



# 데이터 읽기

data = pd.read_csv(url)

In [61]:
data

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [62]:
# 범주형 인코딩
data['sex'] = LabelEncoder().fit_transform(data['sex']) # 문자일 때만 사용
data['smoker'] = LabelEncoder().fit_transform(data['smoker'])
data['region'] = LabelEncoder().fit_transform(data['region'])

In [63]:
# Outcome (라벨) 제거 ( 새로운 라벨은 BMI )
# data.drop(columns=['Outcome'], inplace=True)

# 입력과 타겟 분리
X = data.drop('charges', axis=1).values
y = data['charges'].values.astype(np.float32)

In [64]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [65]:

# TensorDataset으로 래핑
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)


In [66]:

# 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(6, 64),  # feature 개수 일치하는지 확인
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1) # 출력층 개수 확인. 회귀면 1, 이진분류여도 1
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [67]:

# 학습 루프
model.train()
for epoch in range(150):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 322617399.0588
Epoch 2, Loss: 324912239.2941
Epoch 3, Loss: 321879846.5882
Epoch 4, Loss: 321672504.0000
Epoch 5, Loss: 321577080.9412
Epoch 6, Loss: 323333636.2353
Epoch 7, Loss: 325634293.6471
Epoch 8, Loss: 315743273.4118
Epoch 9, Loss: 318134726.8235
Epoch 10, Loss: 309521865.8824
Epoch 11, Loss: 305027711.5294
Epoch 12, Loss: 302570604.2353
Epoch 13, Loss: 296436033.4118
Epoch 14, Loss: 286231592.9412
Epoch 15, Loss: 276753394.8235
Epoch 16, Loss: 272037130.8235
Epoch 17, Loss: 259888063.5294
Epoch 18, Loss: 248543254.5882
Epoch 19, Loss: 234538391.2941
Epoch 20, Loss: 222364322.8235
Epoch 21, Loss: 204853320.7059
Epoch 22, Loss: 191815887.7647
Epoch 23, Loss: 183206266.8235
Epoch 24, Loss: 165020304.0000
Epoch 25, Loss: 151842301.6471
Epoch 26, Loss: 137233570.6471
Epoch 27, Loss: 124021328.1176
Epoch 28, Loss: 112681581.8824
Epoch 29, Loss: 102059529.0588
Epoch 30, Loss: 95233122.4412
Epoch 31, Loss: 83824956.0000
Epoch 32, Loss: 76212514.7647
Epoch 33, Loss: 6907

In [68]:

# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 30012529.2261
