In [3]:
import numpy as np

In [4]:
# def calc_alpha(i):
#     return 1 / i

In [5]:
def gradient_descent(x0, f, gradient, alpha, num_iters, tol):
    result = {}
    
    x = x0
    for i in range(num_iters):
        x_new = x - alpha * gradient(x)
        
        if abs(f(x_new) - f(x)) < tol:
            result['converged'] = True
            break
        
        x = x_new
    
    if 'converged' not in result:
        result['converged'] = False
    result['iter'] = i
    result['x'] = x
            
    return result

In [6]:
def f(x):
    return 0.5*(x[0]**2 + 10*x[1]**2)

In [7]:
def gradient(x):
    return np.array([x[0], 10*x[1]])

In [8]:
x0 = np.array((700,400))
# print(x0)
alpha = 0.01
num_iters = 10000
tol = 1e-5

gradient_descent(x0, f, gradient, alpha, num_iters, tol)

{'converged': True, 'iter': 996, 'x': array([3.14595031e-02, 1.06561271e-43])}

In [9]:
def momentum(x0, f, gradient, alpha, num_iters, tol, beta=0.9):
    result = {}
    
    x = x0
    inertia = 0
    for i in range(num_iters):
        inertia = beta * inertia - alpha * gradient(x)
        x_new = x + inertia
        
        if abs(f(x_new) - f(x)) < tol:
            result['converged'] = True
            break
        
        x = x_new
    
    if 'converged' not in result:
        result['converged'] = False
    result['iter'] = i
    result['x'] = x
            
    return result

In [10]:
momentum(x0, f, gradient, alpha, num_iters, tol, beta=0.9)

{'converged': True, 'iter': 194, 'x': array([-0.01804139,  0.01335318])}

In [11]:
def nesterov(x0, f, gradient, alpha, num_iters, tol, beta=0.9):
    result = {}
    
    x = x0
    inertia = 0
    for i in range(num_iters):
        inertia = beta * inertia - alpha * gradient(x + beta*inertia)
#         x_2 = x + inertia
#         x_new = x_2 - alpha*gradient(x_2)
        x_new = x + inertia
    
        if abs(f(x_new) - f(x)) < tol:
            result['converged'] = True
            break
        
        x = x_new
    
    if 'converged' not in result:
        result['converged'] = False
    result['iter'] = i
    result['x'] = x
            
    return result

In [12]:
nesterov(x0, f, gradient, alpha, num_iters, tol, beta=0.9)

{'converged': True,
 'iter': 172,
 'x': array([ 1.05179270e-03, -2.69569084e-06])}

In [33]:
def adam(x0, f, gradient, alpha, num_iters, tol, beta1, beta2, eps):
    result = {}
    x = x0
    m = 0
    v = 0
    for i in range(1, num_iters+1):
        grad = gradient(x)
        m = beta1 * m + (1 - beta1) * grad
        v = beta2 * v + (1 - beta2) * grad**2
        
        m_hat = m / (1 - beta1**i)
        v_hat = v / (1 - beta2**i)
        
        x_new = x - alpha * m_hat / (np.sqrt(v_hat) + eps)
        
        if abs(f(x_new) - f(x)) < tol:
            result['converged'] = True
            break
        
        x = x_new
        
    if 'converged' not in result:
        result['converged'] = False
    result['iter'] = i
    result['x'] = x
            
    return result

In [47]:
adam(x0=x0,
     f=f,
     gradient=gradient,
     alpha=0.5, # u stvarnim primenama bi alpha bilo mnogo manje, npr. 0.001
     num_iters=num_iters,
     tol=tol,
     beta1=0.9,
     beta2=0.999,
     eps=1e-6)

{'converged': True, 'iter': 4117, 'x': array([4.11236404e-02, 1.14925986e-09])}