In [1]:
import numpy as np
import matplotlib.pyplot as plt


# Regular Gradient Descent

In [53]:
def gradient_descent(point, gamma, gradient, n_iter=1, epsilon=0.001):
    p = point
    points = []
    for i in range(n_iter):
        p = p - gamma * gradient(*p)
        points.append(p)
        if i != 0 and np.linalg.norm(p - points[-2]) < epsilon:
            print(i)
            return p, points
    return p, points

# Polyak Gradient Descent

In [52]:
def polyak_gradient_descent(point, gamma, mu, gradient, n_iter=1, epsilon=0.001):
    p = point
    points = []
    for i in range(n_iter):
        if points != []:
            p = p - gamma * gradient(*p) + mu * (p - points[-1])
        else:
            p = p - gamma * gradient(*p)

        points.append(p)

        if i != 0 and np.linalg.norm(p - points[-2]) < epsilon:
            print(i)
            return p, points
    return p, points

# Nesterov Gradient Descent

In [51]:
def nesterov_gradient_descent(point, gamma, mu, gradient, n_iter=1, epsilon=0.001):
    p = point
    points = []
    for i in range(n_iter):
        if points != []:
            p = p - gamma * gradient(*(p + mu * (p - points[-1]))) + mu * (p - points[-1])
        else:
            p = p - gamma * gradient(*p)
        
        points.append(p)
        
        if i != 0 and np.linalg.norm(p - points[-2]) < epsilon:
            print(i)
            return p, points
    return p, points

# AdaGrad

In [49]:
def adaGrad(point, gamma, gradient, n_iter=1, epsilon=0.001):
    p = point
    points = []
    gradients = np.zeros((n_iter, point.shape[0]))
    for i in range(n_iter):
        grad = gradient(*p)
        squared_gradient = np.square(grad)
        gradients[i] = squared_gradient
        d = np.sum(gradients, axis=0)
        sqrt_d = np.sqrt(d) + 1e-7
        D = np.diag(sqrt_d)
        p = p - gamma * (D @ grad)
        points.append(p)

        if i != 0 and np.linalg.norm(p - points[-2]) < epsilon:
            print(i)
            return p, points
    return p, points

In [19]:
def func(x, y):
    return x**2 + np.e**x + y**2 -x*y

def grad_func(x, y):
    return np.array([2*x + np.e**x - y, 2*y -x])

In [None]:
print(gradient_descent(np.array([1,1]), 0.1, grad_func, n_iter=100, epsilon=0.001))
print(polyak_gradient_descent(np.array([1,1]), 0.5, 0.1, grad_func, n_iter=100, epsilon=0.001))
print(nesterov_gradient_descent(np.array([1,1]), 0.1, 0.1, grad_func, n_iter=100, epsilon=0.001))
print(adaGrad(np.array([1,1]), 0.1, grad_func, n_iter=100, epsilon=0.001))

39
(array([-0.42860024, -0.21082297]), [array([0.62817182, 0.9       ]), array([0.40511934, 0.78281718]), array([0.25242905, 0.66676568]), array([0.13990499, 0.55865545]), array([0.05277309, 0.46091486]), array([-0.01710909,  0.37400919]), array([-0.07458999,  0.29749645]), array([-0.12273474,  0.23053816]), array([-0.16358381,  0.17215705]), array([-0.19856087,  0.12136726]), array([-0.22870296,  0.07723772]), array([-0.25479508,  0.03891988]), array([-0.2774516,  0.0056564]), array([-0.29716687, -0.02322004]), array([-0.3143475 , -0.04829272]), array([-0.32933379, -0.07006893]), array([-0.34241421, -0.08898852]), array([-0.35383563, -0.10543224]), array([-0.36381076, -0.11972935]), array([-0.37252381, -0.13216456]), array([-0.38013483, -0.14298403]), array([-0.38678319, -0.15240071]), array([-0.39259046, -0.16059888]), array([-0.39766278, -0.16773815]), array([-0.40209289, -0.1739568 ]), array([-0.40596186, -0.17937473]), array([-0.40934052, -0.18409597]), array([-0.41229082, -0.1882