In [591]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [592]:
# GPU 사용
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [593]:
X, y = datasets.fetch_openml('boston', return_X_y=True)
print(X)
print(y)

        CRIM    ZN  INDUS CHAS    NOX     RM   AGE     DIS RAD    TAX  \
0    0.00632  18.0   2.31    0  0.538  6.575  65.2  4.0900   1  296.0   
1    0.02731   0.0   7.07    0  0.469  6.421  78.9  4.9671   2  242.0   
2    0.02729   0.0   7.07    0  0.469  7.185  61.1  4.9671   2  242.0   
3    0.03237   0.0   2.18    0  0.458  6.998  45.8  6.0622   3  222.0   
4    0.06905   0.0   2.18    0  0.458  7.147  54.2  6.0622   3  222.0   
..       ...   ...    ...  ...    ...    ...   ...     ...  ..    ...   
501  0.06263   0.0  11.93    0  0.573  6.593  69.1  2.4786   1  273.0   
502  0.04527   0.0  11.93    0  0.573  6.120  76.7  2.2875   1  273.0   
503  0.06076   0.0  11.93    0  0.573  6.976  91.0  2.1675   1  273.0   
504  0.10959   0.0  11.93    0  0.573  6.794  89.3  2.3889   1  273.0   
505  0.04741   0.0  11.93    0  0.573  6.030  80.8  2.5050   1  273.0   

     PTRATIO       B  LSTAT  
0       15.3  396.90   4.98  
1       17.8  396.90   9.14  
2       17.8  392.83   4.03  
3  

- version 1, status: active
  url: https://www.openml.org/search?type=data&id=531
- version 2, status: active
  url: https://www.openml.org/search?type=data&id=853



In [594]:
# 피처 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [595]:
# 훈련, 검증 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=1/10, random_state=0)

# 텐서로 변환
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train.to_numpy(), dtype=torch.float32).to(device)
y_test = torch.tensor(y_test.to_numpy(), dtype=torch.float32).to(device)

# 데이터셋, 미니배치 분할
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [596]:
# 모델 정의 13 - 32 - 32 - 1, 회귀문제
# 피처 [CRIM, ZN, INDUS, CHAS, NOX, RM, AGE, DIS, RAD, TAX, PTRATIO, B, LSTAT] 13개
class Boston_Net(nn.Module):
    def __init__(self) -> None:
        super(Boston_Net, self).__init__()
        self.fc1 = nn.Linear(13, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 32)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out
    
model = Boston_Net().to(device)

In [597]:
# 회귀문제라 평균제곱오차
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [598]:
# 모델 훈련
def train(epoch):
    model.train()

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)         # output shape [32, 1]
        outputs = outputs.squeeze()     # labels shape [32]
        loss = loss_fn(outputs, labels)
        

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1)%10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

In [599]:
# 모델 검증
def test():
    model.eval()
    
    with torch.no_grad():
        total_loss = 0

        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)     # output shape [32, 1]
            outputs = outputs.squeeze() # labels shape [32]
            
            loss = loss_fn(outputs, targets)
            total_loss += loss.item()
            for idx in range(len(outputs)):
                print(f"output: {float(outputs[idx]):.1f}, label: {float(targets[idx]):.1f}")

        avg_loss = total_loss / len(test_loader)
        print(f'Average Loss: {avg_loss:.4f}')

In [600]:
test()

output: -0.2, label: 22.6
output: -0.1, label: 50.0
output: -0.1, label: 23.0
output: 0.0, label: 8.3
output: -0.1, label: 21.2
output: -0.1, label: 19.9
output: -0.1, label: 20.6
output: -0.1, label: 18.7
output: -0.1, label: 16.1
output: -0.1, label: 18.6
output: 0.2, label: 8.8
output: 0.1, label: 17.2
output: 0.0, label: 14.9
output: 0.1, label: 10.5
output: -0.2, label: 50.0
output: -0.2, label: 29.0
output: -0.1, label: 23.0
output: -0.1, label: 33.3
output: -0.2, label: 29.4
output: -0.2, label: 21.0
output: -0.1, label: 23.8
output: -0.1, label: 19.1
output: -0.0, label: 20.4
output: -0.2, label: 29.1
output: -0.2, label: 19.3
output: 0.1, label: 23.1
output: -0.1, label: 19.6
output: -0.0, label: 19.4
output: -0.1, label: 38.7
output: -0.0, label: 18.7
output: -0.0, label: 14.6
output: -0.0, label: 20.0
output: -0.1, label: 20.5
output: -0.1, label: 20.1
output: -0.1, label: 23.6
output: -0.0, label: 16.8
output: 0.0, label: 5.6
output: -0.0, label: 50.0
output: 0.1, label: 14

In [601]:
num_epochs = 100
for epoch in range(num_epochs):
    train(epoch)

Epoch 10, Loss: 12.5611
Epoch 20, Loss: 6.4867
Epoch 30, Loss: 5.3603
Epoch 40, Loss: 8.0867
Epoch 50, Loss: 11.5164
Epoch 60, Loss: 1.1000
Epoch 70, Loss: 5.3854
Epoch 80, Loss: 2.7327
Epoch 90, Loss: 11.1107
Epoch 100, Loss: 3.4977


In [602]:
test()

output: 24.7, label: 22.6
output: 29.1, label: 50.0
output: 25.6, label: 23.0
output: 10.4, label: 8.3
output: 19.4, label: 21.2
output: 19.8, label: 19.9
output: 23.6, label: 20.6
output: 22.3, label: 18.7
output: 20.0, label: 16.1
output: 20.3, label: 18.6
output: 6.4, label: 8.8
output: 14.1, label: 17.2
output: 16.1, label: 14.9
output: 8.5, label: 10.5
output: 45.4, label: 50.0
output: 36.4, label: 29.0
output: 24.8, label: 23.0
output: 40.3, label: 33.3
output: 33.7, label: 29.4
output: 23.4, label: 21.0
output: 24.6, label: 23.8
output: 21.0, label: 19.1
output: 21.2, label: 20.4
output: 28.7, label: 29.1
output: 22.4, label: 19.3
output: 24.1, label: 23.1
output: 16.8, label: 19.6
output: 16.7, label: 19.4
output: 44.4, label: 38.7
output: 18.2, label: 18.7
output: 16.9, label: 14.6
output: 17.3, label: 20.0
output: 21.9, label: 20.5
output: 20.3, label: 20.1
output: 28.4, label: 23.6
output: 20.5, label: 16.8
output: 8.7, label: 5.6
output: 41.7, label: 50.0
output: 15.0, labe