In [728]:
import numpy as np
import torch.optim
from scipy.optimize import minimize, least_squares, LinearConstraint, NonlinearConstraint
import torch.nn as nn
from torch.autograd import Variable
from enum import Enum
from torch import float64

In [729]:
from linreg import Methods, LearningRateScheduling


def optimizer_handler(method, params, lr, beta_1=0.9, beta_2=0.999, factor=10):
    match method:
        case Methods.Classic:
            return torch.optim.SGD(params, lr)
        case Methods.Momentum:
            return torch.optim.SGD(params, lr, beta_1)
        case Methods.AdaGrad:
            return torch.optim.Adagrad(params, lr * factor)
        case Methods.RMSprop:
            return torch.optim.RMSprop(params, lr, alpha=beta_2)
        case Methods.Adam:
            return torch.optim.Adam(params, lr, betas=(beta_1, beta_2))
        case Methods.Nesterov:
            return torch.optim.SGD(params, lr, nesterov=True, momentum=beta_1)


class TorchLinearRegression:
    def __init__(self, T, X, Y, W=None):
        self.T_funcs = T
        self.T = torch.tensor([T[i % len(T)](X[i // len(T)]) for i in range(len(T) * len(X))], dtype=float64).reshape(
            len(X), len(T))
        self.X = torch.tensor(X, dtype=float64)
        self.Y = torch.tensor(Y, dtype=float64)
        self.loss_f = nn.MSELoss(reduction='mean')

        self.refresh(W)

    def optimize(self, method=Methods.Classic, lr=0.01, lrs=LearningRateScheduling.Classic, max_steps=1500):
        optimizer = optimizer_handler(method, [self.W], lr)

        for i in range(max_steps):
            optimizer.zero_grad()
            self.loss(self.W, is_no_grad=False)
            optimizer.step()
            self.W_points.append(self.W.clone().detach().numpy())

        return self.W

    def loss(self, W, is_no_grad=True):
        if isinstance(W, np.ndarray):
            W = np.array(W).reshape(len(self.T_funcs), 1)
            W = Variable(torch.tensor(W, dtype=float64), requires_grad=True)
        if is_no_grad:
            with torch.no_grad():
                loss_val = self.loss_f(self.T @ W, self.Y)
                return float(loss_val) * len(self.X)
        else:
            loss_val = self.loss_f(self.T @ W, self.Y)
            loss_val.backward()
            return float(loss_val)

    def refresh(self, W=None):
        if isinstance(W, np.ndarray):
            W = np.array(W, dtype=float64).reshape(len(self.T_funcs), 1)
        if W is None:
            W = torch.randn(len(self.T_funcs), 1, dtype=float64)
        self.W = Variable(torch.tensor(W), requires_grad=True)
        self.W_points = [torch.clone(self.W).detach().numpy()]

    def analytical_solution(self):
        return (torch.linalg.inv(torch.t(self.T) @ self.T) @ torch.t(self.T)) @ self.Y

In [730]:
from matplotlib import pyplot as plt


def visualise(f, points, title, x_label="x", y_label="y"):
    values = np.transpose(np.array(points))
    X = np.linspace(min(values[0]) - 10, max(values[0]) + 10, 100)
    Y = np.linspace(min(values[1]) - 10, max(values[1]) + 10, 100)
    Z = [[f(np.array([X[i], Y[j]])) for i in range(len(X))] for j in range(len(Y))]
    plt.contour(X, Y, Z, 80)

    plt.plot(values[0], values[1], marker='.')
    plt.plot(values[0][0], values[1][0], 'og')
    plt.plot(values[0][-1], values[1][-1], 'or')
    plt.title(title)
    plt.legend(['Route', 'Start point', 'End point'])
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.show()


def print_errors(linreg, torch_linreg):
    print("-------------------------")

    expected_linreg_loss = linreg.loss(linreg.analytical_solution())
    linreg_loss = linreg.loss(linreg.W)
    print("Excepted loss: " + str(expected_linreg_loss))
    print("Received loss: " + str(linreg_loss))
    print("Absolute error loss: " + str(np.linalg.norm(expected_linreg_loss - linreg_loss)))
    print("Relative error loss: " + str(
        np.linalg.norm(expected_linreg_loss - linreg_loss) / np.linalg.norm(expected_linreg_loss)))

    print()

    expected_torch_linreg_loss = torch_linreg.loss(torch_linreg.analytical_solution())
    torch_linreg_loss = torch_linreg.loss(torch_linreg.W)
    print("Excepted torch loss: " + str(expected_torch_linreg_loss))
    print("Received linreg torch loss: " + str(torch_linreg_loss))
    print("Absolute error torch loss: " + str(np.linalg.norm(expected_torch_linreg_loss - torch_linreg_loss)))
    print("Relative error torch loss: " + str(
        np.linalg.norm(expected_torch_linreg_loss - torch_linreg_loss) / np.linalg.norm(expected_torch_linreg_loss)))

    print("-------------------------")

In [None]:
import random
from linreg import gen_linear_reg, visualise_approximation, sgd_handler


def test_sgd_variants(errors=False):
    count_2_arity = 1
    count_other_arity = 0
    left_coeffs_border = -3.
    right_coeffs_border = 3.
    left_x_border = -2.
    right_x_border = 2.
    deviation = 4.

    for i in range(count_2_arity + count_other_arity):
        arity = 2 if i < count_2_arity else random.randint(3, 8)
        num_train_points = random.randint(50, 100)
        start_point = np.array([float(random.randint(15, 30)) for i in range(arity)])
        linreg = gen_linear_reg(
            arity - 1, num_train_points,
            left_coeffs_border, right_coeffs_border,
            left_x_border, right_x_border,
            deviation
        )
        torch_linreg = TorchLinearRegression(linreg.T_funcs, linreg.X, linreg.Y, torch.tensor(start_point))
        self_scheduled = (Methods.AdaGrad, Methods.Adam, Methods.RMSprop)

        for method in Methods:
            lr = 0.01
            if method in self_scheduled:
                lr = 0.1
            for lrs in LearningRateScheduling:
                if lrs != LearningRateScheduling.Classic and method in self_scheduled:
                    continue
                linreg.refresh(start_point)
                sgd_handler(linreg, lambda *_: lr, method, lrs, store_points=True)
                title = 'OUR ' + method.name + ' | ' + lrs.name
                if len(linreg.T_funcs == 2):
                    visualise_approximation(linreg, title)
                visualise(linreg.loss, linreg.W_points, title)

                title = 'ZAPADNOE ' + method.name + ' | ' + lrs.name
                torch_linreg.refresh(torch.tensor(start_point))
                torch_linreg.optimize(method, lr=lr, lrs=lrs)
                if (len(torch_linreg.T_funcs) == 2):
                    visualise_approximation(torch_linreg, title)
                visualise(torch_linreg.loss, torch_linreg.W_points, title)

                if errors:
                    print_errors(linreg, torch_linreg)


test_sgd_variants(errors=True)

In [None]:
#Example 3

#Excepted
excepted = np.array([-2.34, 8.987, 118.12, 103.1])
M = len(excepted)

#Parameters
N = 100
deviation = 0.01
noise = torch.randn(N, 1) * deviation
powers = [(M - 1 - i) for i in range(M)]
Funcs = np.array([lambda x, i=i: (x ** powers[i]) for i in range(M)])
X1 = torch.randn(N, 1)
Y1 = sum([excepted[i] * Funcs[i](X1) for i in range(M)]) + noise

#Calculations
lin_reg = TorchLinearRegression(Funcs, X1, Y1)
received = lin_reg.optimize().detach().numpy().reshape(1, len(Funcs))[0]
print("Excepted: " + str(excepted))
print("Received: " + str(received))
print("Absolute error: " + str(np.linalg.norm(excepted - received)))
print("Relative error: " + str(np.linalg.norm(excepted - received) / np.linalg.norm(excepted)))

In [733]:
def derivative(f, x, i, delt=0.0001):
    x_1 = np.copy(x)
    x_2 = np.copy(x)
    x_1[i] += delt
    x_2[i] -= delt
    y_1 = f(x_1)
    y_2 = f(x_2)
    return (y_1 - y_2) / (2 * delt)


def grad(f, delt=0.01):
    def grad_calc(x):
        array = []
        for i in range(len(x)):
            array.append(derivative(f, x, i, delt))
        return np.array(array)

    return grad_calc


def hessian(f):
    def calc(x):
        B = np.asarray([[0. for _ in range(len(x))] for _ in range(len(x))])
        for i in range(len(x)):
            for j in range(len(x)):
                B[i][j] = derivative(lambda x_tmp: derivative(f, x_tmp, j), x, i)
        return B

    return calc

In [734]:
Algorithms = Enum('Methods', ['Newton', 'DogLeg', 'BFGS', 'LBFGS'])


def optimize_handler(fun, x0, algorithm=Algorithms.Newton):
    match algorithm:
        case Algorithms.Newton:
            return least_squares(fun, x0)
        case Algorithms.DogLeg:
            return minimize(fun, x0, method='dogleg', jac=grad(fun), hess=hessian(fun))
        case Algorithms.BFGS:
            return minimize(fun, x0, method='BFGS')
        case Algorithms.LBFGS:
            return minimize(fun, x0, method='L-BFGS-B')

In [None]:
#Example 4

#Excepted
excepted = np.array([-2.34, 8.987, 118.12, 103.1])
M = len(excepted)

#Parameters
N = 100
deviation = 0.01
noise = torch.randn(N, 1) * deviation
powers = [(M - 1 - i) for i in range(M)]
Funcs = np.array([lambda x, i=i: (x ** powers[i]) for i in range(M)])
X1 = torch.randn(N, 1)
Y1 = sum([excepted[i] * Funcs[i](X1) for i in range(M)]) + noise


#Function
def f1(W):
    W1 = np.copy(W).reshape(len(Funcs), 1)
    W1 = torch.tensor(W1, dtype=float64)
    T = torch.zeros(len(X1), len(Funcs), dtype=float64)
    for i in range(len(X1)):
        for j in range(len(Funcs)):
            T.data[i, j] = Funcs[j](X1.data[i])
    model = T.mm(W1)
    mse = nn.MSELoss()
    return mse(model, Y1).item()


#Calculations
x0 = torch.randn(M).detach().numpy().reshape(1, len(Funcs))[0]
received = optimize_handler(f1, x0, Algorithms.DogLeg).x
print("Excepted: " + str(excepted))
print("Received: " + str(received))
print("Absolute error: " + str(np.linalg.norm(excepted - received)))
print("Relative error: " + str(np.linalg.norm(excepted - received) / np.linalg.norm(excepted)))