In [42]:
import os
# change the current working directory
os.chdir('..')

# main
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchmetrics import R2Score
import numpy as np

# implementation
from tools import make_regression_data, RegressionDataset, finite_time_opt_training, drem_opt_training,\
                                        standard_training, plot_results, validation_epoch
from optimizers import FiniteTimeOptimizer, DREMOptimizer
from torch.optim import Adam, SGD

# graphics
import matplotlib.pyplot as plt


torch.random.manual_seed(19)
device = "cuda" if torch.cuda.is_available() else "cpu"

## Data
We will work with simple regression data with high noise level

In [2]:
NUMBER_OF_FEATURES = 10
X_train, X_test, y_train, y_test = make_regression_data(number_samples=1000,
                                                        number_features=NUMBER_OF_FEATURES,
                                                        noise_value=0.5)
print(f'shape of train: {X_train.shape, y_train.shape}\nshape of test: {X_test.shape, y_test.shape}')

shape of train: (torch.Size([800, 10]), torch.Size([800, 1]))
shape of test: (torch.Size([200, 10]), torch.Size([200, 1]))


In [3]:
print(f'shape of train: {X_train.shape, y_train.shape}\nshape of test: {X_test.shape, y_test.shape}')
train_dataset = RegressionDataset(features=X_train,
                                  labels=y_train)
test_dataset = RegressionDataset(features=X_test,
                                 labels=y_test)
print(f'example of train sample:\n {train_dataset[21]}')

shape of train: (torch.Size([800, 10]), torch.Size([800, 1]))
shape of test: (torch.Size([200, 10]), torch.Size([200, 1]))
example of train sample:
 (tensor([ 0.5686,  0.6552,  1.6264,  0.3829, -1.3236,  0.2570,  0.2360, -0.4359,
         0.3517,  0.8775]), tensor([176.7505]))


In [4]:
BATCH_SIZE = 10

train_dataloader = DataLoader(dataset=train_dataset,
                              shuffle=True,
                              batch_size=BATCH_SIZE)
test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE)
batch_example_features, batch_example_labels  = next(iter(train_dataloader))
print('shape of batch: features - {} and labels - {}'.format(batch_example_features.shape, batch_example_labels.shape))

shape of batch: features - torch.Size([10, 10]) and labels - torch.Size([10, 1])


# Loss and score function

In [5]:
loss_fn = nn.MSELoss()

metric_fn = R2Score()

# Model

In [35]:
class ComplexNet(nn.Module):
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.fc_first = nn.Linear(10, 65)
        self.fc_main = nn.Sequential(nn.ReLU(),
                                     nn.Linear(65, 10),
                                     nn.ReLU(),
                                     nn.Linear(10, 1))
        # self.fc_last = nn.Linear(10, 1)

    def forward(self, x):
        x = self.fc_first(x)
        x = self.fc_main(x)
        # x = self.fc_last(x)
        return x

# Model
model = ComplexNet().to(device)

# Optimizers

In [36]:
optimizer_adam = Adam(params=model.fc_main.parameters(), lr=0.01)

optimizer_drem = DREMOptimizer(params=model.fc_first.parameters(),
                               lr=1e-20)


# Train: DREM and Adam

In [37]:
num_epochs = 350
new_loss = True
score_val, loss_val = [], []
for epoch in range(1, num_epochs+1):
    # train
    loss_epoch = 0.0
    model.train()
    for X_batch, y_batch in train_dataloader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # forward pass
        predicted = model(X_batch)
        if new_loss:
            determinant = torch.det(X_batch)
            inverse_batch = torch.linalg.inv(X_batch)
            adjoin = determinant * inverse_batch
            loss = loss_fn(adjoin @ predicted, adjoin @ y_batch)
        else:
            loss = loss_fn(predicted, y_batch)

        loss_epoch += loss.detach()

        # zero gradient
        optimizer_drem.zero_grad()
        optimizer_adam.zero_grad()
        # optimizer3.zero_grad()

        # backpropagation (compute gradient)
        loss.backward()

        # update model parameters
        # optimizer3.step()
        optimizer_drem.step(det_batch=determinant)
        optimizer_adam.step()

    # evaluate
    mean_loss_test, mean_metric_test = validation_epoch(model, loss_fn, metric_fn, test_dataloader, "cpu")
    loss_val.append(mean_loss_test)
    score_val.append(mean_metric_test)
    print(f'on the {epoch}th epoch: loss = {(loss_epoch) / (len(train_dataloader)):.3f} & score = {mean_metric_test}')

on the 1th epoch: loss = 15132677120.000 & score = -0.0988222062587738
on the 2th epoch: loss = 4652124672.000 & score = -0.09212847799062729
on the 3th epoch: loss = 8443752448.000 & score = -0.06572342664003372
on the 4th epoch: loss = 14265401344.000 & score = -0.03978119418025017
on the 5th epoch: loss = 7221962240.000 & score = -0.001651135040447116
on the 6th epoch: loss = 4490747904.000 & score = 0.02217349037528038
on the 7th epoch: loss = 8875200512.000 & score = 0.04214435815811157
on the 8th epoch: loss = 10656182272.000 & score = 0.04947804659605026
on the 9th epoch: loss = 3448438784.000 & score = 0.0646325945854187
on the 10th epoch: loss = 7600753664.000 & score = 0.08721859753131866
on the 11th epoch: loss = 5163182592.000 & score = 0.10807178169488907
on the 12th epoch: loss = 4632870912.000 & score = 0.1255250722169876
on the 13th epoch: loss = 6246724608.000 & score = 0.1600666642189026
on the 14th epoch: loss = 9062263808.000 & score = 0.21324582397937775
on the 15t

# FInite-Time optimizer

In [118]:
class ComplexNet(nn.Module):
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.fc_first = nn.Linear(10, 45)
        self.fc_main = nn.Sequential(nn.ReLU(),
                                     nn.Linear(45, 25),
                                     nn.ReLU(),
                                     nn.Linear(25, 1),
                                     nn.ReLU()
                                     )
        # self.fc_last = nn.Linear(10, 1)

    def forward(self, x):
        x = self.fc_first(x)
        x = self.fc_main(x)
        # x = self.fc_last(x)
        return x

# Model
model = ComplexNet().to(device)

In [119]:
N_OF_BATCHES = 25
optimizer_ft = FiniteTimeOptimizer(params=model.fc_first.parameters(),
                                    lr=1e-15,
                                    n_of_batches=N_OF_BATCHES)

# optimizer2 = SGD(params=model.fc_last.parameters(),
#                  lr=0.01)

optimizer_adam = Adam(params=model.fc_main.parameters(),
                     lr=0.01)

In [120]:
num_epochs = 350
new_loss = True
score_val, loss_val = [], []
for epoch in range(1, num_epochs+1):
    # train
    loss_epoch = 0.0
    model.train()
    for batch_num, (X_batch, y_batch) in enumerate(train_dataloader):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # forward pass
        predicted = model(X_batch)
        if new_loss:
            determinant = torch.det(X_batch)
            inverse_batch = torch.linalg.inv(X_batch)
            adjoin = determinant * inverse_batch
            loss = loss_fn(adjoin @ predicted, adjoin @ y_batch)
        else:
            loss = loss_fn(predicted, y_batch)

        loss_epoch += loss.detach()

        # zero gradient
        optimizer_ft.zero_grad()
        optimizer_adam.zero_grad()
        # optimizer3.zero_grad()

        # backpropagation (compute gradient)
        loss.backward()

        # update model parameters
        # optimizer3.step()
        optimizer_ft.step(det_batch=determinant, t=batch_num+1)
        optimizer_adam.step()

    # evaluate
    mean_loss_test, mean_metric_test = validation_epoch(model, loss_fn, metric_fn, test_dataloader, "cpu")
    loss_val.append(mean_loss_test)
    score_val.append(mean_metric_test)

    if torch.isnan(mean_metric_test):
        print('can not apply')
        break

    print(f'on the {epoch}th epoch: loss = {(loss_epoch) / (len(train_dataloader)):.3f} & score = {mean_metric_test}')

on the 1th epoch: loss = 1721425641015869440.000 & score = -0.11156518757343292
on the 2th epoch: loss = 7638746624.000 & score = -0.11156518757343292
can not apply


In [121]:
#### gjxtve kb,thvfkmyst dpukzs

In [131]:
class ComplexNet(nn.Module):
    def __init__(self):
        super(ComplexNet, self).__init__()
        # self.fc_first = nn.Linear(10, 45)
        self.fc_main = nn.Sequential(nn.Linear(10, 45),
                                     nn.ReLU(),
                                     nn.Linear(45, 25),
                                     nn.ReLU(),
                                     nn.Linear(25, 10),
                                     nn.ReLU())
        self.fc_last = nn.Linear(10, 1)

    def forward(self, x):
        # x = self.fc_first(x)
        x = self.fc_main(x)
        x = self.fc_last(x)
        return x

# Model
model = ComplexNet().to(device)

In [132]:
N_OF_BATCHES = 35
optimizer_ft = FiniteTimeOptimizer(params=model.fc_last.parameters(),
                                    lr=1e-11,
                                    n_of_batches=N_OF_BATCHES)

# optimizer2 = SGD(params=model.fc_last.parameters(),
#                  lr=0.01)

optimizer_adam = Adam(params=model.fc_main.parameters(),
                     lr=0.1)

In [133]:
num_epochs = 350
new_loss = True
score_val, loss_val = [], []
for epoch in range(1, num_epochs+1):
    # train
    loss_epoch = 0.0
    model.train()
    for batch_num, (X_batch, y_batch) in enumerate(train_dataloader):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # forward pass
        predicted = model(X_batch)
        if new_loss:
            determinant = torch.det(X_batch)
            inverse_batch = torch.linalg.inv(X_batch)
            adjoin = determinant * inverse_batch
            loss = loss_fn(adjoin @ predicted, adjoin @ y_batch)
        else:
            loss = loss_fn(predicted, y_batch)

        loss_epoch += loss.detach()

        # zero gradient
        optimizer_ft.zero_grad()
        optimizer_adam.zero_grad()
        # optimizer3.zero_grad()

        # backpropagation (compute gradient)
        loss.backward()

        # update model parameters
        # optimizer3.step()
        optimizer_ft.step(det_batch=determinant, t=batch_num+1)
        optimizer_adam.step()

    # evaluate
    mean_loss_test, mean_metric_test = validation_epoch(model, loss_fn, metric_fn, test_dataloader, "cpu")
    loss_val.append(mean_loss_test)
    score_val.append(mean_metric_test)

    if torch.isnan(mean_metric_test):
        print('can not apply')
        break

    print(f'on the {epoch}th epoch: loss = {(loss_epoch) / (len(train_dataloader)):.3f} & score = {mean_metric_test}')

on the 1th epoch: loss = 7577798144.000 & score = -0.09028781950473785
on the 2th epoch: loss = 268553567600640000.000 & score = -18275090.0
on the 3th epoch: loss = 48960093741842879649677312.000 & score = -1.192974947975168e+16
on the 4th epoch: loss = 4863629348834735949756015893807104.000 & score = -1.269344671063551e+24
on the 5th epoch: loss = inf & score = -3.1029039620237233e+31
on the 6th epoch: loss = inf & score = -inf
on the 7th epoch: loss = inf & score = -inf
on the 8th epoch: loss = inf & score = -inf
on the 9th epoch: loss = inf & score = -inf
can not apply
