In [1]:
import numpy as np
import random

In [2]:
def gradient_descent(gradient_func, start_point, iterations, eps):
    '''
    Метод градиентного спуска
    :param gradient_func: функция градиента
    :param step_func: функция изменения шага
    :param start_point: начальное приближение
    :param iterations: число итераций до остановки
    :param eps: максимальное расстояние между точками для остановки
    '''
    current_point = start_point
    current_gradient = np.array([0.0] * len(start_point))
    for it in range(iterations):
        next_gradient = gradient_func(current_point, current_gradient)
        next_point = current_point + next_gradient

        distance = np.linalg.norm(current_point - next_point)
        if distance < eps:
            return current_point, it

        current_point = next_point
        current_gradient = next_gradient

    return current_point, iterations

In [3]:
class MeanSquaredError:
    def function(self, regression, points, state):
        sum_square_error = 0.0
        for p in points:
            sum_square_error += (p[1] - regression.function(state, p[0])) ** 2
        return sum_square_error / len(points)

    def gradient(self, regression, points, state):
        sum_square_error = np.array([0.0] * len(state + 1))
        for p in points:
            sum_square_error -= 2 * (p[1] - regression.function(state, p[0])) * regression.gradient(state, p[0])
        return sum_square_error / len(points)

In [4]:
def standart_gradient(regression, points, n, error_func, step):
    def next_gradient(current_point, current_gradient):
        return - step * error_func.gradient(regression, random.sample(points, n), current_point)
    return next_gradient

def momentum_gradient(regression, points, n, error_func, mu, step):
    def next_gradient(current_point, current_gradient):
        return mu * current_gradient - step * error_func.gradient(regression, random.sample(points, n), current_point)
    return next_gradient

def nesterov_gradient(regression, points, n, error_func, mu, step):
    def next_gradient(current_point, current_gradient):
        return mu * current_gradient - step * error_func.gradient(regression, random.sample(points, n), current_point + mu * current_gradient)
    return next_gradient

In [5]:
class LinearRegression:
    def function(self, state, point):
        res = state[0]
        for i in range(len(point)):
            res += state[i + 1] * point[i]
        return res

    def gradient(self, state, point):
        return np.concatenate(([1.0], point))

In [6]:
number_of_points = 10
number_of_dimensions = 1
points = []
for i in range(number_of_points):
    sum = 1
    point = []
    for j in range(number_of_dimensions):
        x = random.uniform(-10, 10)
        point.append(x)
        sum += (2 + j) * x
    points.append((point, sum + random.uniform(0, 0)))

In [7]:
print(gradient_descent(
    gradient_func=standart_gradient(
        regression=LinearRegression(),
        points=points,
        n=1,
        error_func=MeanSquaredError(),
        step=0.01
    ),
    start_point=np.array([0.0] * (len(points[0][0]) + 1)), 
    iterations=1000,
    eps=1e-5
))

print(gradient_descent(
    gradient_func=momentum_gradient(
        regression=LinearRegression(),
        points=points,
        n=3,
        error_func=MeanSquaredError(),
        step=0.01,
        mu=0.9
    ),
    start_point=np.array([0.0] * (len(points[0][0]) + 1)), 
    iterations=1000,
    eps=1e-5
))

print(gradient_descent(
    gradient_func=nesterov_gradient(
        regression=LinearRegression(),
        points=points,
        n=3,
        error_func=MeanSquaredError(),
        step=0.01,
        mu=0.9
    ),
    start_point=np.array([0.0] * (len(points[0][0]) + 1)), 
    iterations=1000,
    eps=1e-5
))

(array([0.99923235, 2.0001191 ]), 368)
(array([1.00010703, 2.00002142]), 373)
(array([1.00012471, 1.99997297]), 168)
