In [30]:
import numpy as np
from sympy import symbols, diff, sin

def gradient_descent(f, lr, x_t, max_iter, tol):
    #h = 1e-5  # Small step for numerical differentiation
    #y_diff = (f(x0 + h) - f(x0)) / h
    x = symbols('x')
    f_sym = f(x)
    y_diff = diff(f_sym, x)
    i = 0
    while i <= max_iter:
        grad = float(y_diff.subs(x, x_t))
        x_t1 = x_t - lr * grad
        if np.abs(grad) < tol:
            break
        x_t = x_t1
        i += 1

    return f_sym.subs(x, x_t), grad

gradient_descent(
    f=lambda x: x**2 + 5*sin(x),
    lr=0.1,
    x_t=-10,
    max_iter=100,
    tol=1e-1
)

(-3.24623879507347, -0.04492266828968239)

In [54]:
def gradient_descent_variables(f, lr, x_t, max_iter, tol):
    # define vector gradient for multiple variables
    x, y = symbols('x y')
    f_sym = f(x, y)
    grad_x = diff(f_sym, x)
    grad_y = diff(f_sym, y)
    i = 0
    while i <= max_iter:
        grad = [float(grad_x.subs({x: x_t[0], y: x_t[1]})), float(grad_y.subs({x: x_t[0], y: x_t[1]}))]
        x_t1 = [x_t[0] - lr * grad[0], x_t[1] - lr * grad[1]]
        if np.linalg.norm(grad) < tol:
            break
        x_t = x_t1
        i += 1

    return f_sym.subs({x: x_t[0], y: x_t[1]}), x_t[0], x_t[1]

gradient_descent_variables(
    f=lambda x, y: (x**2 + y - 7)**2 + (x - y + 1)**2,
    lr=0.015,
    x_t=[0, 0],
    max_iter=1000,
    tol=1e-3
)

(3.83185466235675e-7, -2.999855012457979, -1.9992471632382347)

In [64]:
def Accelerated_gradient_descent(f, lr, x_t, max_iter, tol):
    # define vector gradient for multiple variables
    x, y = symbols('x y')
    f_sym = f(x, y)
    grad_x = diff(f_sym, x)
    grad_y = diff(f_sym, y)
    i = 0
    x_t1 = x_t  # Initialize x_t1 with the initial value of x_t
    while i <= max_iter - 1:
        f = x_t + (i - 1)/(i + 2) * (np.array(x_t) - np.array(x_t1))
        grad = [float(grad_x.subs({x: x_t[0], y: x_t[1]})), float(grad_y.subs({x: x_t[0], y: x_t[1]}))]
        x_t2 = [f[0] - lr * grad[0], f[1] - lr * grad[1]]
        if np.linalg.norm(grad) < tol:
            break
        x_t = x_t1
        x_t1 = x_t2
        i += 1

    return f_sym.subs({x: x_t[0], y: x_t[1]}), x_t[0], x_t[1]

Accelerated_gradient_descent(
    f=lambda x, y: (x**2 + y - 7)**2 + (x - y + 1)**2,
    lr=0.015,
    x_t=[0, 0],
    max_iter=1000,
    tol=1e-3
)

  x_t2 = [f[0] - lr * grad[0], f[1] - lr * grad[1]]


(nan, np.float64(nan), np.float64(nan))

In [None]:
def Stochastic_gradient_descent(a, b, x_t, max_iter, tol):
    # define vector gradient for multiple variables
    x, y = symbols('x y')
    f_sym = f(x, y)
    grad_x = diff(f_sym, x)
    grad_y = diff(f_sym, y)
    i = 0
    while i <= max_iter - 1:
        grad = [float(grad_x.subs({x: x_t[0], y: x_t[1]})), float(grad_y.subs({x: x_t[0], y: x_t[1]}))]
        x_t1 = [x_t[0] - lr * grad[0], x_t[1] - lr * grad[1]]
        if np.linalg.norm(grad) < tol:
            break
        x_t = x_t1
        i += 1

    return f_sym.subs({x: x_t[0], y: x_t[1]}), x_t[0], x_t[1]

Stochastic_gradient_descent(
    f=lambda x, y: (x**2 + y - 7)**2 + (x - y + 1)**2,
    lr=0.015,
    x_t=[0, 0],
    max_iter=1000,
    tol=1e-3
)

(3.83185466235675e-7, -2.999855012457979, -1.9992471632382347)