In [1]:
import numpy as np
from numpy.linalg import norm
from numpy.random import normal
from numpy.typing import NDArray
from typing import Callable

# Градиентный спуск

In [2]:
def GradientDescent(
    start: NDArray,
    f_grad: Callable,
    f: Callable | None = None,
    learning_rate: float = 0.01,
    max_iter=1000,
    tol=1e-7,
    **kwargs
) -> dict:
    curr_point = start
    curr_value = None
    curr_grad = f_grad(curr_point, **kwargs)

    curr_iter = 0
    while curr_iter == 0 or (curr_iter < max_iter and norm(curr_grad) >= tol):
        curr_point = curr_point - learning_rate * curr_grad
        curr_grad = f_grad(curr_point, **kwargs)
        curr_iter += 1

    if f is not None:
        curr_value = f(curr_point, **kwargs)

    return {
        "point": curr_point,
        "f_value": curr_value,
        "grad_value": curr_grad,
        "iterations": curr_iter,
    }

### Тест градиентного спуска

In [5]:
def f(w, X, y):
    return 1 / y.size * norm(X.dot(w) - y) ** 2


def f_grad(w, X, y):
    return 2 / y.size * X.T.dot(X_ones.dot(w) - y)

np.random.seed(42)
nrow, ncol = 500, 4
X = normal(0, 1, ncol * nrow).reshape(nrow, ncol)
X_ones = np.hstack([X, np.ones((nrow, 1))])
true_w = np.array([2, -3, 1, 0.5, 4])
y = X_ones.dot(true_w) + normal(0, 1, nrow)
w_start = normal(0, 1, ncol + 1)

gd_res = GradientDescent(start=w_start, f_grad=f_grad, X = X_ones, y = y)

In [6]:
print(f'Iterations: {gd_res["iterations"]}')
print(f'||w_e-w_t||^2 = {norm(gd_res["point"] - true_w) ** 2}')

Iterations: 992
||w_e-w_t||^2 = 0.011808794591008321
