In [None]:
import numpy as np
from enum import Enum
from math import exp

Methods = Enum('Methods', ['Classic', 'Momentum', 'AdaGrad', 'RMSprop', 'Adam', 'Nesterov'])
Regularization = Enum('Regularization', ['WithoutRegularization', 'L1', 'L2', 'Elastic'])
LearningRate = Enum('LearningRate', ['Const', 'Dichotomy'])
LearningRateScheduling = Enum('LearningRateScheduling', ['Classic', 'Stepwise', 'Exponential'])


def sign(x):
    if x > 0:
        return 1
    elif x == 0:
        return 0
    else:
        return -1


class LinearRegression:
    def __init__(self, T, W, X, Y, regularization=Regularization.WithoutRegularization, l1=0.1, l2=0.1):
        self.T = np.array([T[i % len(T)](X[i // len(T)]) for i in range(len(T) * len(X))]).reshape(len(X), len(T))
        self.W = W
        self.X = X
        self.Y = Y
        self.regularization = regularization
        self.l1 = l1
        self.l2 = l2

    def sse(self, W_Arg):
        val = sum([(np.dot(self.T[i], W_Arg) - self.Y[i]) ** 2 for i in range(len(self.X))])
        match self.regularization:
            case Regularization.L1:
                val += self.l1 * sum([abs(w) for w in self.W]) / len(self.W)
            case Regularization.L2:
                val += self.l2 * sum([w ** 2 for w in self.W]) / len(self.W)
            case Regularization.Elastic:
                val += (self.l1 * sum([abs(w) for w in self.W])) / len(self.W) + (
                        self.l2 * sum([w ** 2 for w in self.W])) / len(self.W)
        return val

    def grad_by_components(self, index_components, W_Arg):
        grad_with_batch = np.zeros(len(W_Arg))
        for i in index_components:
            grad_with_batch += 2 * (np.dot(self.T[i], W_Arg) - self.Y[i]) * self.T[i]
        match self.regularization:
            case Regularization.L1:
                grad_with_batch += self.l1 * np.array([sign(w) for w in self.W]) / len(self.W)
            case Regularization.L2:
                grad_with_batch += self.l2 * 2 * self.W / len(self.W)
            case Regularization.Elastic:
                grad_with_batch += (self.l1 * np.array([sign(w) for w in self.W])) / len(self.W) + (
                        self.l2 * 2 * self.W) / len(self.W)

        return grad_with_batch


def sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, beta_1, beta_2, eps_adam, is_corr_beta_1=True,
        is_corr_beta_2=True, is_nesterov=False, decay=0.99):
    i = 0
    prev_W = lin_reg.sse(lin_reg.W)
    V = np.zeros(len(lin_reg.W))
    S = np.zeros(len(lin_reg.W))
    lrs_func = lrs_handler(lrs, decay)

    while True:
        components = [(i * batch + j) % len(lin_reg.X) for j in range(batch)]
        cur_w = lin_reg.W
        grad_with_batch = lin_reg.grad_by_components(components, cur_w)

        alpha = lrs_func(lr(lambda a: lin_reg.sse(lin_reg.W - a * grad_with_batch)), (i * batch) % len(lin_reg.X))
        if is_nesterov:
            cur_w -= alpha * beta_1 * V
            grad_with_batch = lin_reg.grad_by_components(components, cur_w)

        V = (beta_1 * V) + (1 - beta_1) * grad_with_batch
        S = (beta_2 * S) + (1 - beta_2) * (grad_with_batch ** 2)
        if is_corr_beta_1:
            V /= 1 - (beta_1 ** (i + 1))
        if is_corr_beta_2:
            S /= 1 - (beta_2 ** (i + 1))

        lin_reg.W -= alpha * (V / ((S + eps_adam) ** 0.5))
        if abs(lin_reg.sse(lin_reg.W) - prev_W) < eps or i >= max_num_of_step:
            break
        prev_W = lin_reg.sse(lin_reg.W)
        i += 1

    return i


def sgd_handler(lin_reg, lr, method, lrs=LearningRateScheduling.Classic, batch=1, beta_1=0.9, beta_2=0.999,
                eps_adam=10 ** -8,
                eps=0.001, max_num_of_step=10000):
    args = {}
    match method:
        case Methods.Classic:
            return sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, 0, 1, 1, False, False)
        case Methods.Momentum:
            return sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, beta_1, 1, 1, False, False)
        case Methods.AdaGrad:
            return sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, 0, 0, eps_adam, False, False)
        case Methods.RMSprop:
            return sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, 0, beta_2, eps_adam, False)
        case Methods.Adam:
            return sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, beta_1, beta_2, eps_adam)
        case Methods.Nesterov:
            return sgd(lin_reg, lr, lrs, eps, batch, max_num_of_step, beta_1, 1, 1, False, False, True)


def lr_dichotomy(eps, delt):
    return lambda lin_reg: dichotomy(lin_reg, 0, right_border_calc(lin_reg), eps, delt)


def right_border_calc(func):
    right_start = 0.0000001
    zero = func(0.)
    while zero >= func(right_start):
        right_start *= 1.3

    return right_start


def dichotomy(func, a_1, a_2, eps, delt):
    while abs(a_1 - a_2) >= eps:
        new_a_1 = (a_1 + a_2) / 2 - delt
        new_a_2 = (a_1 + a_2) / 2 + delt
        fv1 = func(new_a_1)
        fv2 = func(new_a_2)
        if fv2 > fv1:
            a_2 = new_a_2
        elif fv2 < fv1:
            a_1 = new_a_1
        else:
            a_1 = new_a_1
            a_2 = new_a_2
    return (a_1 + a_2) / 2


def lrs_exp(decay=0.99):
    return lambda lr, t: lr * (decay ** t)


def lrs_step(decay=0.99):
    return lambda lr, t: lr / (1 + decay * t)


def lrs_handler(lrs, decay=0.99):
    match lrs:
        case LearningRateScheduling.Classic:
            return lambda lr, t: lr
        case LearningRateScheduling.Stepwise:
            return lrs_step(decay)
        case LearningRateScheduling.Exponential:
            return lrs_exp(decay)



In [None]:
import matplotlib.pyplot as plt


def visualise_points(linear_reg):
    x = np.linspace(-10, 10, 1000)
    y = linear_reg.W[0] * x + linear_reg.W[1]
    plt.plot(x, y, '-r')
    plt.plot(linear_reg.X, linear_reg.Y, 'og', linestyle='None')
    plt.xlabel("x")
    plt.show()
    print('W: ' + str(linear_reg.W))
    print('Loss: ' + str(linear_reg.sse(linear_reg.W)))

In [None]:
current_t = np.array([lambda x: x, lambda x: 1.])
current_w = np.array([0., 0.])
current_x = np.array([1., 2., 9., -2., -10.])
current_y = np.array([1., 2., 9., -2., 5.])

for method in Methods:
    for regularization in Regularization:
        for lr in LearningRate:
            for lrs in LearningRateScheduling:
                linear_reg_const = LinearRegression(
                    current_t, current_w, current_x, current_y, regularization
                )
                print(str(method), str(regularization), str(lr), str(lrs))
                count_rates = 0
                if lr == LearningRate.Const:
                    count_rates = sgd_handler(linear_reg_const, lambda x: 0.01, method, lrs=lrs)
                elif lr == LearningRate.Dichotomy:
                    count_rates = sgd_handler(linear_reg_const, lr_dichotomy(0.001, 0.0001), method, lrs=lrs)
                visualise_points(linear_reg_const)
                print('Count rates: ' + str(count_rates))
                current_w = np.array([0., 0.])
                print("\n\n")