In [None]:
from numpy.linalg import norm
from numpy.typing import ArrayLike
from typing import Callable

## Общая функция градиентного спуска

In [None]:
def GradientDescent(
    start: ArrayLike,
    f: Callable,
    f_grad: Callable,
    learning_rate: float = 0.01,
    max_iter=1000,
    tol=1e-7,
    **kwargs
) -> dict:
    curr_point = start
    curr_value, prev_value = f(curr_point, **kwargs), None
    curr_grad = f_grad(curr_point, **kwargs)

    curr_iter = 0
    while curr_iter == 0 or (
        0 < curr_iter < max_iter
        and norm(curr_grad) >= tol
        and abs(curr_value - prev_value) >= tol
    ):
        temp_point = curr_point - learning_rate * curr_grad
        temp_value = f(temp_point, **kwargs)
        curr_point = temp_point
        prev_value, curr_value = curr_value, temp_value
        curr_grad = f_grad(curr_point, **kwargs)
        curr_iter += 1

    return {
        "point": curr_point,
        "f_value": curr_value,
        "grad_value": curr_grad,
        "iterations": curr_iter,
    }


### Тест градиентного спуска

In [None]:
def f(w, X, y):
    return 1 / y.size * norm(X.dot(w) - y) ** 2


def f_grad(w, X, y):
    return 2 / y.size * X.T.dot(X.dot(w) - y)

np.random.seed(42)
nrow, ncol = 15, 4
X = np.hstack([np.array(normal(0, 1, ncol * nrow)).reshape(nrow, ncol), np.ones((nrow, 1))])
y = normal(0, 1, nrow)
w_start = normal(0, 1, ncol + 1)
# print(f(w_start, X, y), f_grad(w_start, X, y).shape)

GradientDescent(w_start, f, f_grad, X = X, y = y)

6.299163269775785 (5,)


{'point': array([-0.03087644,  0.31055409,  0.11018273, -0.13628583,  0.21170507]),
 'f_value': np.float64(1.1533208330601774),
 'grad_value': array([-7.78114477e-05, -2.25666402e-03,  3.51358041e-04,  7.67418060e-04,
        -1.97615386e-03]),
 'iterations': 632}