In [1]:
import torch

def evaluate_f(x):
    z = x[0] ** 2 - 0.2 * x[1] ** 2
    return z
    
def evaluate_grad_x(x_a):
    grad_x = torch.tensor([2 *x_a[0], -0.4*x_a[1] ])
    return grad_x




def evaluate_f_2(x):
    z = x[0] ** 2 +  x[1] ** 2
    return z
    
def evaluate_grad_x_2(x_a):
    grad_x = torch.tensor([2 *x_a[0], 2*x_a[1] ])
    return grad_x

def random_init_x():
    x_a = torch.tensor([6300, 6300])
    return x_a
    
def update_x(x_t, evaluate_grad_x_n,  alpha = 0.01):
    #calculate the gradient in x_t
    grad_x = evaluate_grad_x_n(x_t)
    #update x, making x(t + 1)
    x_t_plus_1 = x_t - alpha * grad_x
    return x_t_plus_1

def update_x_adagrad(x_t, s_t, evaluate_grad_x_n,  rho = 1, iteration_frame = 5):
    #calculate the gradient in x_t
    grad_x = evaluate_grad_x_n(x_t)
    #update s(t)
    s_t = s_t + (grad_x) ** 2
    #print("s_t")
    #print(s_t)
    #alpha_t inverse of s_t (element wise)
    alpha_t = rho / s_t 
    #update x, making x(t + 1)
    x_t_plus_1 = x_t - alpha_t * grad_x
    return x_t_plus_1

def run_gradient_descent(evaluate_f, evaluate_grad_x_n, T = 20):
    x_0 = random_init_x()
    #repeat T times
    x_t = x_0
    print("Iteracion 0 ", x_t)
    for i in range(0, T):
        x_t = update_x(x_t, evaluate_grad_x_n)
        print("Iteracion ", i, " ",  x_t, "f(x_t): ", evaluate_f(x_t))
    return x_t    


def run_adaptive_gradient_descent(evaluate_grad_x_n, T = 20):
    x_0 = random_init_x()
    #repeat T times
    x_t = x_0
    epsilon = 0.00001
    #init s_t
    s_t = torch.ones(2) * epsilon
    print("Iteracion 0 ", x_t)
    for i in range(0, T):
        x_t = update_x_adagrad(x_t, s_t, evaluate_grad_x_n)
        print("Iteracion ", i, " ",  x_t, "f(x_t): ", evaluate_f(x_t))
    return x_t    


def run_newton_rhapson(evaluate_grad_x_n, T = 20):
    x_0 = random_init_x()
    #repeat T times
    x_t = x_0
    epsilon = 0.00001
    #init s_t
    s_t = torch.ones(2) * epsilon
    print("Iteracion 0 ", x_t)
    for i in range(0, T):
        x_t = update_x_adagrad(x_t, s_t, evaluate_grad_x_n)
        print("Iteracion ", i, " ",  x_t, "f(x_t): ", evaluate_f(x_t))
    return x_t 
    
def test_1():    
    #unit test 1    
    x_a = torch.tensor([10.0, 0])
    x_b = update_x(x_a)
    print(x_b)

def test_2():
    run_gradient_descent(evaluate_grad_x)
    
def test_3():
    run_adaptive_gradient_descent(evaluate_grad_x)

def test_4():    
    run_gradient_descent(evaluate_f_2, evaluate_grad_x_2)

print("Testing gradient descent with F2 = x1 ^2 + x2 ^2")
test_4()


Testing gradient descent with F2 = x1 ^2 + x2 ^2
Iteracion 0  tensor([6300, 6300])
Iteracion  0   tensor([6174., 6174.]) f(x_t):  tensor(76236552.)
Iteracion  1   tensor([6050.5200, 6050.5200]) f(x_t):  tensor(73217584.)
Iteracion  2   tensor([5929.5098, 5929.5098]) f(x_t):  tensor(70318176.)
Iteracion  3   tensor([5810.9194, 5810.9194]) f(x_t):  tensor(67533568.)
Iteracion  4   tensor([5694.7012, 5694.7012]) f(x_t):  tensor(64859244.)
Iteracion  5   tensor([5580.8071, 5580.8071]) f(x_t):  tensor(62290816.)
Iteracion  6   tensor([5469.1909, 5469.1909]) f(x_t):  tensor(59824100.)
Iteracion  7   tensor([5359.8071, 5359.8071]) f(x_t):  tensor(57455064.)
Iteracion  8   tensor([5252.6108, 5252.6108]) f(x_t):  tensor(55179840.)
Iteracion  9   tensor([5147.5586, 5147.5586]) f(x_t):  tensor(52994720.)
Iteracion  10   tensor([5044.6074, 5044.6074]) f(x_t):  tensor(50896128.)
Iteracion  11   tensor([4943.7153, 4943.7153]) f(x_t):  tensor(48880644.)
Iteracion  12   tensor([4844.8408, 4844.8408]) 