#Non linear programming

In this notebook we present three method to find the minimum of the function

$$f(x) = x^4 - 3x^3 +2 $$

This methods are: Quasi Newton Method (BFGS), gradient descent (slowest yet most used in deep learning) and Newton Raphson method (fastest method, commonly can not be used)

As an excercise, we first program the three methods, then use the corresponding libraries in python

#Function

In [10]:
import numpy as np
from scipy.optimize import minimize

def f(x):
  return x**4 - 3*x**3 + 2

def f_deriv(x):
  return 3*x**3 - 9*x**2

def f_seg_deriv(x):
  return 6*x**2 - 18*x

#Quasi Newton Method

In [19]:
def quasi_newton(f_deriv, x_0, tol=1e-7, max_iter=100):
  x = x_0
  H = 1.0 #extimacion inicial... recomendada por CHAT GPT :D

  for i in range(max_iter):
    grad = f_deriv(x)
    if abs(grad) < tol:
      break

    d = -H * grad
    x_new = x + d
    grad_new = f_deriv(x_new)

    s = x_new - x
    y = grad_new - grad

    if y!= 0: #Por si acaso
      H = (s/y)
    x = x_new

  return x

x_min = quasi_newton(f_deriv, 2.0)
print("Minimum found at x =", x_min)

Minimum found at x = 3.0000000000000386


In [11]:
result = minimize(f, x0 = 2.0, jac=f_deriv, method="BFGS") #It is actually BDFGS by deffect
print("Minimum with BFGS:", result.x)

Minimum with BFGS: [2.]


#Newton_Raphson Method

In [28]:
def newton_raphson(f_deriv, f_seg_deriv, x_0, tol = 1e-9, max_iter = 1000000): #... initial point, really sensible
  x = x_0
  for i in range(max_iter):
    grad = f_deriv(x)
    hess = f_seg_deriv(x)
    if abs(grad) < tol:
      break
    x -= grad/hess
  return x

x_min = newton_raphson(f_deriv, f_seg_deriv, 2.0)
print("Minimum found at x =", x_min)

Minimum found at x = 7.62939453125e-06


In [14]:
result_new_raph = minimize(f, x0=2.0, jac=f_deriv, hess=f_seg_deriv,method="Newton-CG")
print("Minimum with Newton Raphson method:", result_new_raph.x)

Minimum with Newton Raphson method: [2.]


#Gradient Descent Method

In [30]:
def gradiente(f_deriv, x_0, ta = 0.05, tol = 1e-06, max_iter=1000): #0.05 better than 0.01
  x = x_0
  for i in range(max_iter):
    grad = f_deriv(x)
    if abs(grad) < tol:
      break
    x = -ta * grad
  return x

x_min = gradiente(f_deriv, 2.0)
print("Minimum found at x =", x_min)

Minimum found at x = 2.347749264605701e-05


In [16]:
import torch
x = torch.tensor([2.0], requires_grad = True) #Inicial value, 2.0

optimizer = torch.optim.SGD([x], lr =0.01)

for j in range(100):
  optimizer.zero_grad()
  loss = x**4 - 3*x**3 + 2
  loss.backward() #Obtain a gradient
  optimizer.step()

print(x.item()) #Minimum found


2.249999523162842
