### [ Pytorch 기반 회귀 모델 구현 ]
- Layer => Full-Connected Layer, Linear
-  손실함수 => MSELoss, MAELoss ...

[1] 데이터 준비<hr>

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchmetrics import R2Score

In [8]:
bostonDF = pd.read_csv('../../data/BostonHousing.csv')
bostonDF.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [9]:
train = bostonDF.sample(frac=0.8, random_state=10)
val = train.sample(frac=0.1, random_state=10)
test = bostonDF.drop(train.index)

In [24]:
ytrain =train['medv']
xtrain= train.drop(['medv','chas','rad','zn'], axis=1)
ytest = test['medv']
xtest = test.drop(['medv','chas','rad','zn'], axis=1)
yval = val['medv']
xval = val.drop(['medv','chas','rad','zn'], axis=1)

In [25]:
class dataset(torch.utils.data.Dataset):

    # 데이터 전처리 => 정규화, 텐서화
    def __init__(self, feature, target):
        super().__init__()
        feature = feature.values if isinstance(feature, pd.DataFrame) else feature
        target = target.values if isinstance(target, pd.Series) else target
        self.feature = torch.FloatTensor(feature)
        self.target = torch.FloatTensor(target)
        self.length = len(self.feature)
        self.norm_feature = self.normalization()

    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self, idx):
        return self.feature[idx], self.target[idx]
    
    # 데이터 정규화 기능 함수
    def normalization(self):
        return self.feature.max(dim=1)

In [26]:
trainDS = dataset(xtrain, ytrain)
valDS = dataset(xval, yval)
testDS = dataset(xtest, ytest)

In [27]:
trainDL = DataLoader(trainDS, batch_size=32, shuffle=True)
valDL = DataLoader(valDS, batch_size=32, shuffle=True)

In [28]:
bostonDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   chas     506 non-null    int64  
 4   nox      506 non-null    float64
 5   rm       506 non-null    float64
 6   age      506 non-null    float64
 7   dis      506 non-null    float64
 8   rad      506 non-null    int64  
 9   tax      506 non-null    int64  
 10  ptratio  506 non-null    float64
 11  b        506 non-null    float64
 12  lstat    506 non-null    float64
 13  medv     506 non-null    float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB


In [29]:
class MODEL(nn.Module):
    def __init__(self,IN,OUT):
        super().__init__()
        self.fc1 = nn.Linear(IN, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, OUT)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [30]:
IN = xtrain.shape[1]
OUT = 1
model = MODEL(IN,OUT)
OPTIMIZER = optim.Adam(model.parameters(), lr=0.001)
EPOCHS = 300
SCHEDULER = ReduceLROnPlateau(OPTIMIZER, mode = 'min', patience = 3)
r2score = R2Score()

In [31]:
def training(dl):
    loss_list = []
    r2_list = []
    model.train()
    for x, y in dl:
        y_pred = model(x)
        loss =  F.l1_loss(y_pred, y.unsqueeze(1))
        r2 = r2score(y_pred,y.unsqueeze(1))
        OPTIMIZER.zero_grad()
        loss.backward()
        OPTIMIZER.step()
        loss_list.append(loss.item())
        r2_list.append(r2.item())
    train_loss = np.mean(loss_list)
    train_r2 = np.mean(r2_list)
    return train_loss, train_r2

In [32]:
def testing(dl):
    loss_list = []
    r2_list = []
    model.eval()
    with torch.no_grad():
        for x, y in dl:
            y_pred = model(x)
            loss =  F.l1_loss(y_pred, y.unsqueeze(1))
            r2 = r2score(y_pred,y.unsqueeze(1))
            loss_list.append(loss.item())
            r2_list.append(r2.item())
            
    val_loss = np.mean(loss_list)
    val_r2 = np.mean(r2_list)
    return val_loss, val_r2


In [33]:
for epoch in range(EPOCHS):
    train_loss, train_r2 = training(trainDL)
    val_loss, val_r2 = testing(valDL)
    print(f'[Epoch : {epoch+1} / {EPOCHS}]')
    print(f'Train Loss : {train_loss:.4f} Train R2 : {train_r2:.4f}')
    print(f'Test Loss : {val_loss:.4f} Test R2 : {val_r2:.4f}')
    SCHEDULER.step(val_loss)
    if SCHEDULER.num_bad_epochs >= SCHEDULER.patience:
        print(f'조기 종료 at epoch {epoch}')
        break

[Epoch : 1 / 300]
Train Loss : 9.4144 Train R2 : -0.9453
Test Loss : 7.4516 Test R2 : -0.3362
[Epoch : 2 / 300]
Train Loss : 5.8642 Train R2 : 0.0643
Test Loss : 6.1667 Test R2 : -0.1817
[Epoch : 3 / 300]
Train Loss : 5.6762 Train R2 : 0.1468
Test Loss : 5.3714 Test R2 : -0.0054
[Epoch : 4 / 300]
Train Loss : 5.4857 Train R2 : 0.2308
Test Loss : 5.8920 Test R2 : -0.1766
[Epoch : 5 / 300]
Train Loss : 5.4857 Train R2 : 0.1963
Test Loss : 6.0117 Test R2 : -0.7343
[Epoch : 6 / 300]
Train Loss : 5.4262 Train R2 : 0.1796
Test Loss : 6.2367 Test R2 : -0.3239
조기 종료 at epoch 5


In [None]:
def prediction(ds):
    model.eval()
    with torch.no_grad():
        pre = model(ds.feature)
        loss =  F.l1_loss(pre, ds.target.unsqueeze(1))
        print(f'Test Loss : {loss.item():.4f}')

prediction(testDS)

Test Loss : 5.3923
