In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import torch.utils.data as Data
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### [Pytorch 기반 회귀 모델 구현]
- Layer => Full Connected Layer
- Loss Function => MSELoss, MAELoss
- Optimizer => Adam
- Model => Linear Regression
- Dataset => BostonHousing
- DataLoader => DataLoader
- Data => TensorDataset
- Model => Linear Regression
- Loss Function => MSE
- Optimizer => Adam
- Train => 1000
- Test => 100
- Epoch => 100
- Batch Size => 10
- 결과 => 0.11

In [16]:
dfboston = pd.read_csv('./data/BostonHousing.csv')
dfboston.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   chas     506 non-null    int64  
 4   nox      506 non-null    float64
 5   rm       506 non-null    float64
 6   age      506 non-null    float64
 7   dis      506 non-null    float64
 8   rad      506 non-null    int64  
 9   tax      506 non-null    int64  
 10  ptratio  506 non-null    float64
 11  b        506 non-null    float64
 12  lstat    506 non-null    float64
 13  medv     506 non-null    float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB


In [17]:
class CustomData(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.x = torch.tensor(self.data.iloc[:, :-1].values, dtype=torch.float32, device=DEVICE)
        self.y = torch.tensor(self.data.iloc[:, -1].values, dtype=torch.float32, device=DEVICE)
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
            
        x = self.x[idx]
        y = self.y[idx]
            
        return x, y
    
    def normalization(self):
        result = self.x.max(dim=1)
        return result
        
        
    
dataset = CustomData('./data/BostonHousing.csv')
# dataset.normalization()

In [18]:

train, test = random_split(dataset, [0.8, 0.2])

train.indices.__len__(), test.indices.__len__()

(405, 101)

In [19]:
trainloader = DataLoader(train, batch_size=50, shuffle=True, drop_last=True)
testloader = DataLoader(test, batch_size=50, shuffle=False, drop_last=True)


In [23]:

class LinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(13, 131)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(131, 1)
    
    def forward(self, x):
        x = self.linear(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x
    

model = LinearRegression().to(DEVICE)
criterion = nn.MSELoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=10)



In [24]:
from torchmetrics.regression import MeanSquaredError, MeanAbsoluteError, R2Score

def train(model, trainloader, criterion, optimizer):
    model.train()
    lossResult = []
    r2 = R2Score().to(DEVICE)
    mae = MeanAbsoluteError().to(DEVICE)
    mse = MeanSquaredError().to(DEVICE)
    for inputs, labels in trainloader:
        
        outputs = model(inputs)
        labels = labels.reshape(-1, 1)
        loss = criterion(outputs, labels)
        r2(outputs, labels)
        mae(outputs, labels)
        mse(outputs, labels)
        
        lossResult.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    lossavg = np.mean(lossResult)
    r2avg = r2.compute().cpu().item()
    print(f'Train Loss : {lossavg:.4f}', end='  ')
    print(f'Train R2 : {r2avg:.4f}')
    
    return lossavg, r2avg

def test(model, testloader, loss):
    model.eval()
    lossResult = []
    r2 = R2Score().to(DEVICE)
    mae = MeanAbsoluteError().to(DEVICE)
    mse = MeanSquaredError().to(DEVICE)
    
    with torch.no_grad():
        for inputs, labels in testloader:
            outputs = model(inputs)
            labels = labels.reshape(-1, 1)
            r2(outputs, labels)
            mae(outputs, labels)
            mse(outputs, labels)
            
            lossresult = loss(outputs, labels)
            lossResult.append(lossresult.item())
            
    lossavg = np.mean(lossResult)
    r2avg = r2.compute().cpu().item()
    maeavg = mae.compute().cpu().item()
    mseavg = mse.compute().cpu().item()

    print(f'Test Loss : {lossavg:.4f}', end='  ' )
    print(f"Test R2 : {r2avg:.4f}")
    # print(f"Test MAE : {maeavg:.4f}")
    # print(f"Test MSE : {mseavg:.4f}")
    
    
    return lossavg, r2avg, maeavg, mseavg




In [25]:

from tqdm import tqdm
EPOCHES = 10000

for epoch in range(EPOCHES):
    print(f'Epoch : {epoch+1}')
    lossavg, r2avg = train(model, trainloader, criterion, optimizer)
    test(model, testloader, criterion)
    # stop using patience criteria
    
    # scheduler.step(lossavg)
    # if scheduler.num_bad_epochs > scheduler.patience:
    #     print(f'Early stopping at epoch {epoch}...')
    #     break

    

Epoch : 1
Train Loss : 610.2045  Train R2 : -6.1022
Test Loss : 641.8383  Test R2 : -6.9414
Epoch : 2
Train Loss : 525.2482  Train R2 : -5.2745
Test Loss : 561.4122  Test R2 : -5.9463
Epoch : 3
Train Loss : 457.7741  Train R2 : -4.3894
Test Loss : 488.7327  Test R2 : -5.0470
Epoch : 4
Train Loss : 399.6127  Train R2 : -3.6718
Test Loss : 425.4091  Test R2 : -4.2635
Epoch : 5
Train Loss : 347.5386  Train R2 : -3.1667
Test Loss : 370.1681  Test R2 : -3.5800
Epoch : 6
Train Loss : 310.0963  Train R2 : -2.6254
Test Loss : 321.7778  Test R2 : -2.9813
Epoch : 7
Train Loss : 267.7544  Train R2 : -2.1458
Test Loss : 277.8856  Test R2 : -2.4382
Epoch : 8
Train Loss : 235.8546  Train R2 : -1.7452
Test Loss : 241.5800  Test R2 : -1.9890
Epoch : 9
Train Loss : 207.5810  Train R2 : -1.4309
Test Loss : 210.8917  Test R2 : -1.6093
Epoch : 10
Train Loss : 181.0621  Train R2 : -1.1558
Test Loss : 184.9581  Test R2 : -1.2885
Epoch : 11
Train Loss : 164.4144  Train R2 : -0.9120
Test Loss : 162.8884  Test

Train Loss : 94.3635  Train R2 : -0.0987
Test Loss : 86.6996  Test R2 : -0.0727
Epoch : 19
Train Loss : 90.5550  Train R2 : -0.0567
Test Loss : 82.4621  Test R2 : -0.0203
Epoch : 20
Train Loss : 86.9302  Train R2 : -0.0246
Test Loss : 78.7363  Test R2 : 0.0258
Epoch : 21
Train Loss : 84.6915  Train R2 : 0.0122
Test Loss : 75.9661  Test R2 : 0.0601
Epoch : 22
Train Loss : 80.9133  Train R2 : 0.0390
Test Loss : 73.6184  Test R2 : 0.0891
Epoch : 23
Train Loss : 81.6388  Train R2 : 0.0485
Test Loss : 71.7839  Test R2 : 0.1118
Epoch : 24
Train Loss : 78.9192  Train R2 : 0.0590
Test Loss : 70.1207  Test R2 : 0.1324
Epoch : 25
Train Loss : 77.7883  Train R2 : 0.0845
Test Loss : 69.1938  Test R2 : 0.1439
Epoch : 26
Train Loss : 75.9231  Train R2 : 0.0969
Test Loss : 67.9905  Test R2 : 0.1588
Epoch : 27
Train Loss : 76.8922  Train R2 : 0.1007
Test Loss : 66.6080  Test R2 : 0.1759
Epoch : 28
Train Loss : 76.1438  Train R2 : 0.1124
Test Loss : 65.7033  Test R2 : 0.1871
Epoch : 29
Train Loss : 74.