In [1]:
import os
# change the current working directory
os.chdir('..')

# main
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchmetrics import R2Score

# implementation
from tools import make_regression_data, RegressionDataset, finite_time_opt_training, drem_opt_training,\
                                        standard_training, plot_results, validation_epoch
from optimizers import FiniteTimeOptimizer, DREMOptimizer
from torch.optim import Adam, SGD

# graphics
import matplotlib.pyplot as plt


torch.random.manual_seed(19)
device = "cuda" if torch.cuda.is_available() else "cpu"

## Data
We will work with simple regression data with high noise level

In [2]:
NUMBER_OF_FEATURES = 10
X_train, X_test, y_train, y_test = make_regression_data(number_samples=1000,
                                                        number_features=NUMBER_OF_FEATURES,
                                                        noise_value=0.5)
print(f'shape of train: {X_train.shape, y_train.shape}\nshape of test: {X_test.shape, y_test.shape}')

shape of train: (torch.Size([800, 10]), torch.Size([800, 1]))
shape of test: (torch.Size([200, 10]), torch.Size([200, 1]))


In [3]:
print(f'shape of train: {X_train.shape, y_train.shape}\nshape of test: {X_test.shape, y_test.shape}')
train_dataset = RegressionDataset(features=X_train,
                                  labels=y_train)
test_dataset = RegressionDataset(features=X_test,
                                 labels=y_test)
print(f'example of train sample:\n {train_dataset[21]}')

shape of train: (torch.Size([800, 10]), torch.Size([800, 1]))
shape of test: (torch.Size([200, 10]), torch.Size([200, 1]))
example of train sample:
 (tensor([ 0.5686,  0.6552,  1.6264,  0.3829, -1.3236,  0.2570,  0.2360, -0.4359,
         0.3517,  0.8775]), tensor([176.7505]))


In [5]:
BATCH_SIZE = 10

train_dataloader = DataLoader(dataset=train_dataset,
                              shuffle=True,
                              batch_size=BATCH_SIZE)
test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE)
batch_example_features, batch_example_labels  = next(iter(train_dataloader))
print('shape of batch: features - {} and labels - {}'.format(batch_example_features.shape, batch_example_labels.shape))

shape of batch: features - torch.Size([10, 10]) and labels - torch.Size([10, 1])


# Loss and score function

In [9]:
loss_fn = nn.MSELoss()

metric_fn = R2Score()

# Model

In [30]:
class ComplexNet(nn.Module):
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.fc_main = nn.Sequential(nn.Linear(10, 35),
                                     nn.ELU(),
                                     nn.Linear(35, 15),
                                     nn.ReLU(),
                                     nn.Linear(15, 10),
                                     nn.ReLU())
        self.fc_last = nn.Linear(10, 1)

    def forward(self, x):
        x = self.fc_main(x)
        # x = torch.relu(self.fc_main(x))
        x = self.fc_last(x)
        return x

# Model
model = ComplexNet().to(device)

# Optimizers

In [31]:
N_OF_BATCHES = 5

optimizer = FiniteTimeOptimizer(params=model.fc_last.parameters(),
                                lr=0.001,
                                n_of_batches=N_OF_BATCHES)

optimizer2 = SGD(params=model.fc_last.parameters(),
                 lr=0.01)

optimizer3 = Adam(params=model.fc_main.parameters(),
                  lr=0.0001)

optimizer4 = SGD(params=model.parameters(), lr=0.01)

optimizer_drem = DREMOptimizer(params=model.parameters(),
                               lr=1e-7)

# Train

In [32]:
num_epochs = 100
new_loss = False
for epoch in range(1, num_epochs+1):
    loss_epoch = 0.0
    model.train()

    for X_batch, y_batch in train_dataloader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # forward pass
        predicted = model(X_batch)
        if new_loss:
            determinant = torch.det(X_batch)
            inverse_batch = torch.linalg.inv(X_batch)
            adjoin = determinant * inverse_batch
            loss = loss_fn(adjoin @ predicted, adjoin @ y_batch)
        else:
            loss = loss_fn(predicted, y_batch)

        loss_epoch += loss.detach()

        # zero gradient
        optimizer_drem.zero_grad()
        # optimizer3.zero_grad()

        # backpropagation (compute gradient)
        loss.backward()

        # update model parameters
        # optimizer3.step()
        optimizer_drem.step(det_batch=determinant)
    print(f'on the {epoch}th epoch: loss = {(loss_epoch) / (len(train_dataloader)):.3f} & score = {}')
    print(f' ')

on the 1th epoch: loss = 17577.656
on the 2th epoch: loss = 17577.613
on the 3th epoch: loss = 17577.566
on the 4th epoch: loss = 17577.520
on the 5th epoch: loss = 17577.469
on the 6th epoch: loss = 17577.418
on the 7th epoch: loss = 17577.373
on the 8th epoch: loss = 17577.324
on the 9th epoch: loss = 17577.275
on the 10th epoch: loss = 17577.227
on the 11th epoch: loss = 17577.176
on the 12th epoch: loss = 17577.137
on the 13th epoch: loss = 17577.086
on the 14th epoch: loss = 17577.035
on the 15th epoch: loss = 17576.988
on the 16th epoch: loss = 17576.936
on the 17th epoch: loss = 17576.891
on the 18th epoch: loss = 17576.842
on the 19th epoch: loss = 17576.793
on the 20th epoch: loss = 17576.740
on the 21th epoch: loss = 17576.691
on the 22th epoch: loss = 17576.639
on the 23th epoch: loss = 17576.598
on the 24th epoch: loss = 17576.545
on the 25th epoch: loss = 17576.502
on the 26th epoch: loss = 17576.453
on the 27th epoch: loss = 17576.402
on the 28th epoch: loss = 17576.348
o

In [15]:
model(X_batch)

tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan]], grad_fn=<AddmmBackward0>)

In [None]:
loss_epoch = 0.0
    model.train()
    for X_batch, y_batch in data_train:
        X_batch, y_batch = X_batch.to(current_device), y_batch.to(current_device)
        # forward pass
        predicted = model(X_batch)
        if new_loss:
            determinant = torch.det(X_batch)
            inverse_batch = torch.linalg.inv(X_batch)
            adjoin = determinant * inverse_batch
            loss = loss_fn(adjoin @ predicted, adjoin @ y_batch)
        else:
            loss = loss_fn(predicted, y_batch)

        loss_epoch += loss.detach()

        # zero gradient
        optimizer.zero_grad()

        # backpropagation (compute gradient)
        loss.backward()

        # update model parameters
        optimizer.step()

        # update learning rate
    if scheduler:
        scheduler.step()