# Target
Сравнение стандартных оптимизаторов (доступных в PyTorch) при обучении однослойного перцептрона с новой функцией потерь, использующей присоединенную матрицу

## libraries

In [1]:
import numpy as np
import torch
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from torchmetrics import R2Score

from data_help_functions import make_regression_data, RegressionDataset
from train_test_help_functions import training_model

torch.random.manual_seed(42)
device = "cuda" if torch.cuda.is_available() else "cpu"

## Data

In [2]:
X_train, X_test, y_train, y_test = make_regression_data(number_samples=100,
                                                        number_features=10,
                                                        noise_value=2.5,
                                                        )
print(f'shape of train: {X_train.shape, y_train.shape}\nshape of test: {X_test.shape, y_test.shape}')

shape of train: (torch.Size([80, 10]), torch.Size([80, 1]))
shape of test: (torch.Size([20, 10]), torch.Size([20, 1]))


In [3]:
train_dataset = RegressionDataset(features=X_train,
                                  labels=y_train)
test_dataset = RegressionDataset(features=X_test,
                                 labels=y_test)
print(f'example of train sample:\n {train_dataset[19]}')

example of train sample:
 (tensor([-0.5173, -0.5229,  1.5796,  2.2989,  1.4534, -0.3628, -0.2818,  1.4093,
        -0.4455, -0.4202]), tensor([186.3322]))


In [4]:
BATCH_SIZE=10

train_dataloader = DataLoader(dataset=train_dataset,
                              shuffle=True,
                              batch_size=BATCH_SIZE)
test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE)
batch_example_features, batch_example_labels  = next(iter(train_dataloader))
print('shape of batch: features - {} and labels - {}'.format(batch_example_features.shape, batch_example_labels.shape))

shape of batch: features - torch.Size([10, 10]) and labels - torch.Size([10, 1])


## Optimizers & loss function & metric

In [5]:
loss_fn = nn.MSELoss()

metric_fn = R2Score()

optimizers = [optim.Adam, optim.SGD, optim.RMSprop, optim.AdamW, optim.Adamax]
learning_rates = [1, 1e-5, 0.1, 10, 1]

## Train simple perceptron with classic MSE loss function

In [6]:
optimizers_comparison = dict()
for optimizer, lr in zip(optimizers, learning_rates):
    epochs, metric = training_model(model=nn.Linear(in_features=10, out_features=1, bias=False),
                                    optimizer_fn=optimizer,
                                    loss_fn=loss_fn,
                                    metric_fn=metric_fn,
                                    data_train=train_dataloader,
                                    data_test=test_dataloader,
                                    learning_rate=lr)
    optimizers_comparison[optimizer.__name__] = (epochs, metric)

In [7]:
for optim_name in optimizers_comparison:
    epochs, metric = optimizers_comparison[optim_name]
    print(f'{optim_name} need {epochs} epochs for {metric:.3f} score')

Adam need 16 epochs for 0.981 score
SGD need 10000 epochs for 0.943 score
RMSprop need 96 epochs for 0.951 score
AdamW need 16 epochs for 0.956 score
Adamax need 36 epochs for 0.953 score


## Train simple perceptron with non-classic MSE loss function
![new loss](../new_loss.png)

In [8]:
optimizers_comparison = dict()
for optimizer, lr in zip(optimizers, learning_rates):
    epochs, metric = training_model(model=nn.Linear(in_features=10, out_features=1, bias=False),
                                    optimizer_fn=optimizer,
                                    loss_fn=loss_fn,
                                    metric_fn=metric_fn,
                                    data_train=train_dataloader,
                                    data_test=test_dataloader,
                                    learning_rate=lr,
                                    with_addition=True)
    optimizers_comparison[optimizer.__name__] = (epochs, metric)

In [9]:
for optim_name in optimizers_comparison:
    epochs, metric = optimizers_comparison[optim_name]
    print(f'{optim_name} need {epochs} epochs for {metric:.3f} score')

Adam need 86 epochs for 0.953 score
SGD need 61 epochs for 0.999 score
RMSprop need 316 epochs for 0.952 score
AdamW need 66 epochs for 0.963 score
Adamax need 721 epochs for 0.951 score


## Train more complex model with Adam and new loss

In [10]:
simple_model = nn.Sequential(
    nn.Linear(in_features=10, out_features=35),
    nn.ELU(),
    nn.Linear(in_features=35, out_features=1),
)
training_model(model=simple_model,
               optimizer_fn=optim.Adam,
               loss_fn=loss_fn,
               metric_fn=metric_fn,
               data_train=train_dataloader,
               data_test=test_dataloader,
               learning_rate=0.1,
               printed=True, valid_period=5,
               with_addition=True)

epoch 1: loss = 56815.758 and score = 0.051
epoch 6: loss = 8752.783 and score = 0.868
epoch 11: loss = 11989.217 and score = 0.797
epoch 16: loss = 10615.666 and score = 0.829
epoch 21: loss = 8917.953 and score = 0.865
epoch 26: loss = 14021.646 and score = 0.781
epoch 31: loss = 15407.912 and score = 0.761
epoch 36: loss = 16055.662 and score = 0.748
epoch 41: loss = 17260.533 and score = 0.725
epoch 46: loss = 9363.602 and score = 0.852
epoch 51: loss = 8644.997 and score = 0.860
epoch 56: loss = 7208.608 and score = 0.885
epoch 61: loss = 6653.069 and score = 0.894
epoch 66: loss = 6675.034 and score = 0.895
epoch 71: loss = 4564.716 and score = 0.927
epoch 76: loss = 3512.664 and score = 0.945
epoch 81: loss = 3461.405 and score = 0.944
epoch 86: loss = 2310.698 and score = 0.963


(86, tensor(0.9633))

## import new optimizer

In [6]:
from custom_optim_example import DREMOptimizer
DREMOptimizer.__name__

'DREMOptimizer'

In [7]:
training_model(model=nn.Linear(in_features=10, out_features=1, bias=False),
               optimizer_fn=DREMOptimizer,
               loss_fn=loss_fn,
               metric_fn=metric_fn,
               data_train=train_dataloader,
               data_test=test_dataloader,
               learning_rate=0.01,
               printed=True, valid_period=5,
               with_addition=True, new_optim=True)

epoch 1: loss = 1947.710 and score = 0.969


(1, tensor(0.9688))

* функции активации
* графики сходимости
*

In [21]:
simple_model = nn.Sequential(
    nn.Linear(in_features=10, out_features=15, bias=False),
    nn.ReLU(),
    nn.Linear(in_features=15, out_features=1, bias=False),
)
training_model(model=simple_model,
               optimizer_fn=DREMOptimizer,
               loss_fn=loss_fn,
               metric_fn=metric_fn,
               data_train=train_dataloader,
               data_test=test_dataloader,
               learning_rate=1e-10,
               printed=True, valid_period=5,
               with_addition=True, new_optim=True)

epoch 1: loss = 60025.680 and score = -0.005
epoch 6: loss = 59977.504 and score = -0.005
epoch 11: loss = 59969.402 and score = -0.005
epoch 16: loss = 59955.352 and score = -0.004
epoch 21: loss = 59932.320 and score = -0.004
epoch 26: loss = 59915.000 and score = -0.004
epoch 31: loss = 59903.051 and score = -0.004
epoch 36: loss = 59886.547 and score = -0.004
epoch 41: loss = 59852.574 and score = -0.003
epoch 46: loss = 59836.777 and score = -0.003
epoch 51: loss = 59643.062 and score = 0.000
epoch 56: loss = 59526.016 and score = 0.002
epoch 61: loss = 59497.781 and score = 0.003
epoch 66: loss = 59453.477 and score = 0.003
epoch 71: loss = 59335.809 and score = 0.005
epoch 76: loss = 59238.570 and score = 0.007
epoch 81: loss = 59097.297 and score = 0.009
epoch 86: loss = 58999.344 and score = 0.010
epoch 91: loss = 57127.273 and score = 0.038
epoch 96: loss = 55889.414 and score = 0.060
epoch 101: loss = 54958.523 and score = 0.075
epoch 106: loss = 54128.922 and score = 0.088


(246, tensor(0.9500))