# Target
Сравнение стандартных оптимизаторов (доступных в PyTorch) при обучении однослойного перцептрона с новой функцией потерь, использующей присоединенную матрицу

## libraries

In [1]:
import numpy as np
import torch
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from torchmetrics import R2Score

from custom_data import make_regression_data, RegressionDataset

device = "cuda" if torch.cuda.is_available() else "cpu"

## help function for train & evaluate

In [19]:
def training_simple_perceptron(optimizer_fn, loss_fn, metric_fn,
                               data_train, data_test, input_features=10,
                               learning_rate=1e-3, current_device=device,
                               max_epochs=1000, metric_constraint=0.95,
                               with_addition=False):
    """Train 1-layer perceptron
    Parameters:
        optimizer_fn (function): optimizer for update parameters
        loss_fn (function): loss function to measure model error
        metric_fn (function): function to evaluate model accuracy
        data_train (torch.utils.data.DataLoader): train dataloader
        data_test (torch.utils.data.DataLoader): test dataloader
        input_features (int): how many features our data has
        learning_rate (float): speed for gradient descent
        current_device (str): current available device (cuda or cpu)
        max_epochs (int): the constraint for training epochs
        metric_constraint (float): the lower constraint for score function
        with_addition (bool): do we use new loss function or not (see picture below)
    """
    simple_perceptron = nn.Linear(in_features=input_features, out_features=1, bias=False).to(device)
    optimizer = optimizer_fn(params=simple_perceptron.parameters(),
                             lr=learning_rate)
    metric = 0.0
    epoch = 1
    while (metric < metric_constraint) and (epoch < max_epochs):
        simple_perceptron.train()

        for X_batch, y_batch in data_train:
            X_batch, y_batch = X_batch.to(current_device), y_batch.to(current_device)
            # forward pass
            predicted = simple_perceptron(X_batch)

            # loss computation
            if with_addition:
                determinant = torch.det(X_batch)
                inverse_batch = torch.linalg.inv(X_batch)
                adjoint = determinant * inverse_batch
                loss = loss_fn(adjoint @ predicted, adjoint @ y_batch)
            else:
                loss = loss_fn(predicted, y_batch)

            # zero gradient
            optimizer.zero_grad()

            # backpropagation (compute gradient)
            loss.backward()

            # update model parameters
            optimizer.step()

        # measure model quality
        if epoch % 5 == 0:
            #print(f'epoch {epoch}')
            with torch.inference_mode():
                metric, loss = 0.0, 0.0
                for X_batch, y_batch in data_test:
                    X_batch, y_batch = X_batch.to(current_device), y_batch.to(current_device)
                    predicted = simple_perceptron(X_batch)
                    loss += loss_fn(predicted, y_batch)
                    metric += metric_fn(predicted, y_batch)
                metric /= len(data_test)
                loss /= len(data_test)
                #print('on the test set: R2 score = {:.3f}, loss = {:.3f}'.format(metric, loss))
        epoch += 1
    return epoch, metric

## Data

In [8]:
X_train, X_test, y_train, y_test = make_regression_data(number_samples=100,
                                                        number_features=10)
print(f'shape of train: {X_train.shape, y_train.shape}\nshape of test: {X_test.shape, y_test.shape}')

shape of train: (torch.Size([80, 10]), torch.Size([80, 1]))
shape of test: (torch.Size([20, 10]), torch.Size([20, 1]))


In [9]:
train_dataset = RegressionDataset(features=X_train,
                                  labels=y_train)
test_dataset = RegressionDataset(features=X_test,
                                 labels=y_test)
print(f'example of train sample:\n {train_dataset[19]}')

example of train sample:
 (tensor([-1.2478, -0.4400, -0.2526, -3.2413,  0.1307, -0.0595, -1.0244, -0.9269,
         1.6324, -1.4301]), tensor([-501.1621]))


In [10]:
BATCH_SIZE=10

train_dataloader = DataLoader(dataset=train_dataset,
                              shuffle=True,
                              batch_size=BATCH_SIZE)
test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE)
batch_example_features, batch_example_labels  = next(iter(train_dataloader))
print('shape of batch: features - {} and labels - {}'.format(batch_example_features.shape, batch_example_labels.shape))

shape of batch: features - torch.Size([10, 10]) and labels - torch.Size([10, 1])


## Optimizers & loss function & metric

In [12]:
loss_fn = nn.MSELoss()

metric_fn = R2Score()

optimizers = [optim.Adam, optim.SGD, optim.RMSprop, optim.AdamW, optim.Adamax]
learning_rates = [1, 1e-3, 0.1, 1, 1]

## Training with classic MSE loss function

In [15]:
optimizers_comparison = dict()
for optimizer, lr in zip(optimizers, learning_rates):
    epochs, metric = training_simple_perceptron(optimizer_fn=optimizer,
                                                loss_fn=loss_fn,
                                                metric_fn=metric_fn,
                                                data_train=train_dataloader,
                                                data_test=test_dataloader,
                                                learning_rate=lr)
    optimizers_comparison[optimizer.__name__] = (epochs, metric)

In [16]:
for optim_name in optimizers_comparison:
    epochs, metric = optimizers_comparison[optim_name]
    print(f'{optim_name} need {epochs} epochs for {metric:.3f} score')

Adam need 16 epochs for 0.975 score
SGD need 91 epochs for 0.950 score
RMSprop need 101 epochs for 0.953 score
AdamW need 191 epochs for 0.950 score
Adamax need 46 epochs for 0.967 score


## Training with non-classic MSE loss function
![new loss](../new_loss.png)

In [17]:
optimizers_comparison = dict()
for optimizer, lr in zip(optimizers, learning_rates):
    epochs, metric = training_simple_perceptron(optimizer_fn=optimizer,
                                                loss_fn=loss_fn,
                                                metric_fn=metric_fn,
                                                data_train=train_dataloader,
                                                data_test=test_dataloader,
                                                learning_rate=lr,
                                                with_addition=True)
    optimizers_comparison[optimizer.__name__] = (epochs, metric)

In [18]:
for optim_name in optimizers_comparison:
    epochs, metric = optimizers_comparison[optim_name]
    print(f'{optim_name} need {epochs} epochs for {metric:.3f} score')

Adam need 26 epochs for 0.950 score
SGD need 6 epochs for nan score
RMSprop need 306 epochs for 0.952 score
AdamW need 1000 epochs for 0.281 score
Adamax need 381 epochs for 0.954 score
