In [11]:
import numpy as np
from numpy.linalg import norm
from numpy.random import normal
from numpy.typing import NDArray
from typing import Callable

## Общая функция градиентного спуска

In [12]:
def GradientDescent(
    start: NDArray,
    f: Callable,
    f_grad: Callable,
    learning_rate: float = 0.01,
    max_iter=1000,
    tol=1e-7,
    **kwargs
) -> dict:
    curr_point = start
    curr_value, prev_value = f(curr_point, **kwargs), None
    curr_grad = f_grad(curr_point, **kwargs)

    curr_iter = 0
    while curr_iter == 0 or (
        0 < curr_iter < max_iter
        and norm(curr_grad) >= tol
        and abs(curr_value - prev_value) >= tol
    ):
        temp_point = curr_point - learning_rate * curr_grad
        temp_value = f(temp_point, **kwargs)
        curr_point = temp_point
        prev_value, curr_value = curr_value, temp_value
        curr_grad = f_grad(curr_point, **kwargs)
        curr_iter += 1

    return {
        "point": curr_point,
        "f_value": curr_value,
        "grad_value": curr_grad,
        "iterations": curr_iter,
    }


### Тест градиентного спуска

In [22]:
def f(w, X, y):
    X_tmp = np.hstack([X, np.ones((y.size, 1))])
    return 1 / y.size * norm(X_tmp.dot(w) - y) ** 2


def f_grad(w, X, y):
    X_tmp = np.hstack([X, np.ones((y.size, 1))])
    return 2 / y.size * X_tmp.T.dot(X_tmp.dot(w) - y)

np.random.seed(42)
nrow, ncol = 15, 4
X = normal(0, 1, ncol * nrow).reshape(nrow, ncol)
true_w = np.array([2, -3, 1, 0.5, 4])
y = np.hstack([X, np.ones((nrow, 1))]).dot(true_w) + normal(0, 1, nrow)
w_start = normal(0, 1, ncol + 1)

gd_res = GradientDescent(w_start, f, f_grad, X = X, y = y)

In [25]:
print(f'Iterations: {gd_res["iterations"]}')
print(f'||w_e-w_t||^2 = {norm(gd_res["point"] - true_w) ** 2}')

Iterations: 976
||w_e-w_t||^2 = 0.17407489786824498
