### 선형 회귀 모델의 학습에서 다양한 옵티마이저를 적용해보기

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from adamp import AdamP
from adamp import SGDP

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

#### Boston data load

In [2]:
boston = load_boston()
X = boston.data
y = boston.target

# 데이터 스케일링
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
print(len(X_train), len(X_test), len(y_train), len(y_test))

455 51 455 51


#### 모델 생성 및 하이퍼파라미터 설정

In [3]:
# 하이퍼파라미터 설정
input_dim = X.shape[1]
output_dim = 1
lr = 0.00000000005
epochs = 1000

# 모델 생성
model = nn.Linear(input_dim, output_dim)

#### 다양한 옵티마이저 설정

In [4]:
optimizers = {'SGD' : optim.SGD(model.parameters(), lr=lr),
              'Momentum' : optim.SGD(model.parameters(), lr=lr, momentum=0.9),
              'Adagrad' : optim.Adagrad(model.parameters(), lr=lr),
              'RMSprop' : optim.RMSprop(model.parameters(), lr=lr),
              'Adam' : optim.Adam(model.parameters(), lr=lr),
              'AdamP' : AdamP(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=1e-2),
              'SGDP' : SGDP(model.parameters(), lr=lr, weight_decay=1e-5, momentum=0.9, nesterov=True)}

In [5]:
for optimizer_name, optimizer in optimizers.items():
    # print(optimizer_name, optimizer)
    criterion = nn.MSELoss()
    optimizer.zero_grad()

    for epoch in range(epochs):
        inputs = torch.tensor(X_train, dtype=torch.float32)
        labels = torch.tensor(y_train, dtype=torch.float32)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()

        # print progress
        if (epoch + 1) % 100 == 0 :
            print(f"{optimizer_name} - Epoch [{epoch+1} / {epochs}, Loss : {loss.item():.4f}]")



SGD - Epoch [100 / 1000, Loss : 594.3737]
SGD - Epoch [200 / 1000, Loss : 594.3721]
SGD - Epoch [300 / 1000, Loss : 594.3696]
SGD - Epoch [400 / 1000, Loss : 594.3660]
SGD - Epoch [500 / 1000, Loss : 594.3615]
SGD - Epoch [600 / 1000, Loss : 594.3559]
SGD - Epoch [700 / 1000, Loss : 594.3492]
SGD - Epoch [800 / 1000, Loss : 594.3417]
SGD - Epoch [900 / 1000, Loss : 594.3329]
SGD - Epoch [1000 / 1000, Loss : 594.3234]
Momentum - Epoch [100 / 1000, Loss : 594.3190]
Momentum - Epoch [200 / 1000, Loss : 594.3047]
Momentum - Epoch [300 / 1000, Loss : 594.2803]
Momentum - Epoch [400 / 1000, Loss : 594.2456]
Momentum - Epoch [500 / 1000, Loss : 594.2008]
Momentum - Epoch [600 / 1000, Loss : 594.1458]
Momentum - Epoch [700 / 1000, Loss : 594.0806]
Momentum - Epoch [800 / 1000, Loss : 594.0053]
Momentum - Epoch [900 / 1000, Loss : 593.9199]
Momentum - Epoch [1000 / 1000, Loss : 593.8242]
Adagrad - Epoch [100 / 1000, Loss : 593.8232]
Adagrad - Epoch [200 / 1000, Loss : 593.8232]
Adagrad - Epoch 