# Task 3. Algorithms for unconstrained nonlinear optimization. First- and secondorder methods

In [28]:
import pandas as pd
import numpy as np
from scipy.optimize import least_squares, minimize, curve_fit

import plotly.express as px
import plotly.graph_objects as go

!pip install autograd
from autograd import jacobian

You should consider upgrading via the '/opt/python/envs/default/bin/python -m pip install --upgrade pip' command.[0m


## Goal
The use of first- and second-order methods (Gradient Descent, Non-linear Conjugate Gradient Descent, Newton’s method and Levenberg-Marquardt algorithm) in the tasks of unconstrained nonlinear optimization

## Problems and methods

## I. Generate random numbers $\alpha ∈ (0,1)$ and $\beta ∈ (0,1)$

In [29]:
alpha = np.random.random()
beta = np.random.random()

Furthermore, generate the noisy data $\{x_k, y_k\}$, where $k = 0, \dots , 100$, according to the following rule:
\begin{align*}
y_k= \alpha x_k + \beta + \delta_k, x_k=\frac{k}{100}
\end{align*}
where $\delta_k \sim (0,1)$ are values of a random variable with standard normal
distribution. Approximate the data by the following linear and rational function

In [30]:
x_k = []
y_k = []
y_init = []
for k in range(101):
    x_k.append(k / 100)
    y_init.append(alpha * x_k[k] + beta)
    
    y_k.append(y_init[k] + np.random.normal(0, 1))

### $1. F(x, a, b) = ax + b$ (linear approximant),

In [31]:
# linear approximation function
def linear_approximant(x, a, b):
    y = []
    for i in range(101):
        y.append(a * x[i] + b)
    return np.array(y)

### $2. F(x, a, b) = \frac{a}{1+ bx}$ (rational approximant),

In [32]:
# rational approximation function
def rational_approximant(x, a, b):
    y = []
    for i in range(101):
        y.append(a / (1 + b * x[i]))
    return np.array(y)

by means of least squares through the numerical minimization (with precision $\epsilon = 0.001)$ of the following function:
\begin{align*}
D(a, b) = \sum^{100}_{k=0} \left(F(x_k, a,b) - y_k\right)^2
\end{align*}

In [33]:
epsilon = 0.001

def linear_means_of_least_squares(params, y_f, x):
    a, b = params
    return np.sum((linear_approximant(x, a, b) - y_f) ** 2)


def rational_means_of_least_squares(params, y_f, x):
    a, b = params
    return np.sum((rational_approximant(x, a, b) - y_f) ** 2)

To solve the minimization problem, use the methods of `Gradient Descent`, `Conjugate Gradient Descent`, `Newton’s method` and `Levenberg-Marquardt algorithm`. If necessary, set the initial approximations and other parameters of the methods. Visualize the data and the approximants obtained in a plot separately for each type of approximant so that one can compare the results for the numerical methods used. Analyze the results obtained (in terms of number of iterations, precision, number of function evaluations, etc.) and compare them with those from Task 2 for the same
dataset.

In [34]:
# # Minimization of function using the Gradient Descent
lin_gradient_descent = minimize(
        linear_means_of_least_squares,
        [0, 0],
        method='BFGS',
        args=(y_k, x_k),
        tol=epsilon
)

print("Linear optimization (gradient descent):\n", lin_gradient_descent)

Linear optimization (gradient descent):
       fun: 101.87981249572879
 hess_inv: array([[ 0.77788607, -0.41783871],
       [-0.41783871,  0.22947793]])
      jac: array([9.53674316e-07, 1.90734863e-06])
  message: 'Optimization terminated successfully.'
     nfev: 15
      nit: 2
     njev: 5
   status: 0
  success: True
        x: array([0.411897  , 0.08960455])


In [35]:
# Minimization of function using the conjugate gradient algorithm.
lin_conjugate_gradient = minimize(
    linear_means_of_least_squares,
    [0, 0], 
    method="CG", 
    args=(y_k, x_k),
    tol=epsilon
)

print("Linear optimization (conjugate gradient method):\n", lin_conjugate_gradient)

Linear optimization (conjugate gradient method):
      fun: 101.8798124957288
     jac: array([-9.53674316e-07,  0.00000000e+00])
 message: 'Optimization terminated successfully.'
    nfev: 15
     nit: 2
    njev: 5
  status: 0
 success: True
       x: array([0.411897  , 0.08960454])


In [36]:
# Newton’s method
lin_newtons_gradient = minimize(
    linear_means_of_least_squares,
    [0, 0], 
    method="Newton-CG", 
    args=(y_k, x_k), 
    jac=jacobian(linear_means_of_least_squares), 
    tol=epsilon
)

print("Linear optimization (conjugate Newton's gradient):\n", lin_newtons_gradient)                                

Linear optimization (conjugate Newton's gradient):
      fun: 101.87981249572876
     jac: array([-3.50439778,  2.16733022])
    nfev: 3
    nhev: 0
     nit: 2
    njev: 86
  status: 3
 success: False
       x: array([0.411897  , 0.08960455])


In [37]:
# Levenberg-Marquardt algorithm
"""
"lm" : Levenberg-Marquardt algorithm as implemented in scipy.optimize.curve_fit. 
Doesn’t handle bounds and sparse Jacobians. Usually the most efficient method for small 
unconstrained problems.
"""
lin_levenberg_marquardt = curve_fit(
    linear_approximant, 
    xdata=x_k,
    ydata=y_k,
    method="lm"
)

print("Linear optimization (Levenberg-Marquardt algorithm):\n", lin_levenberg_marquardt[0])                                

Linear optimization (Levenberg-Marquardt algorithm):
 [0.41189703 0.08960453]


## Helper functions to draw a beautiful plot

In [38]:
# print_optimization_plot is a helper function that drwas the plot
def print_optimization_plot(plot_title, legend_title, x_k, y_init, y_k, y_opt_gradient_descent, y_opt_conjugate_gradient, y_opt_newtons_gradient, y_opt_levenberg_marquardt):
    fig = go.Figure()
    fig = px.scatter(x=x_k, y=y_k)


    fig.add_trace(
        go.Scatter(
            x=x_k,
            y=y_init,
            name="Non noisy data"
        )
    )

    fig.add_trace(
        go.Scatter(
            x=x_k,
            y=y_opt_gradient_descent,
            name="Gradient desent method"
        )
    )

    fig.add_trace(
        go.Scatter(
            x=x_k,
            y=y_opt_conjugate_gradient,
            name="Conjugate gradient method"
        )
    )

    fig.add_trace(
        go.Scatter(
            x=x_k,
            y=y_opt_newtons_gradient,
            name="Newton method"
        )
    )

    fig.add_trace(
        go.Scatter(
            x=x_k,
            y=y_opt_levenberg_marquardt,
            name="Levenberg Marquardt method")
    )

    fig.update_layout(
        title=plot_title,
        legend_title=legend_title,
    )


    fig.show()

## Minimization of Linear Aapproximant

In [39]:
y_linear_opt_gradient_descent = []
for i in range(101):
    y_linear_opt_gradient_descent.append(lin_gradient_descent.x[0] * x_k[i] + lin_gradient_descent.x[1])

In [40]:
y_linear_opt_newtons_gradient = []
for i in range(101):
    y_linear_opt_newtons_gradient.append(lin_newtons_gradient.x[0] * x_k[i] + lin_newtons_gradient.x[1])

In [41]:
y_linear_opt_conjugate_gradient = []
for i in range(101):
    y_linear_opt_conjugate_gradient.append(lin_conjugate_gradient.x[0] * x_k[i] + lin_conjugate_gradient.x[1])

In [42]:
y_linear_opt_lin_levenberg_marquardt = []
for i in range(101):
    y_linear_opt_lin_levenberg_marquardt.append(lin_levenberg_marquardt[0] * x_k[i] + lin_levenberg_marquardt[1])

In [43]:
print_optimization_plot(
    "Minimization of Linear Aapproximant", "Methods", x_k, y_init, y_k, 
    y_linear_opt_gradient_descent, y_linear_opt_conjugate_gradient, y_linear_opt_newtons_gradient, y_linear_opt_lin_levenberg_marquardt
)

## Minimization of Rational Approximant

In [44]:
# # Minimization of function using the Gradient Descent
rat_gradient_descent = minimize(
        rational_means_of_least_squares,
        [0, 0],
        method='BFGS',
        args=(y_k, x_k),
        tol=epsilon
)

print("Rational optimization (gradient descent):\n", rat_gradient_descent)

Rational optimization (gradient descent):
       fun: 102.3464314455297
 hess_inv: array([[0.00590603, 0.0156364 ],
       [0.0156364 , 0.06368679]])
      jac: array([ 2.96592712e-04, -9.15527344e-05])
  message: 'Optimization terminated successfully.'
     nfev: 60
      nit: 10
     njev: 20
   status: 0
  success: True
        x: array([ 0.19757003, -0.59644702])


In [45]:
# Minimization of function using the conjugate gradient algorithm.
rat_conjugate_gradient = minimize(
    rational_means_of_least_squares,
    [0, 0], 
    method="CG", 
    args=(y_k, x_k),
    tol=epsilon
)

print("Rational optimization (conjugate gradient method):\n", rat_conjugate_gradient)

Rational optimization (conjugate gradient method):
      fun: 102.34643144548599
     jac: array([-2.26974487e-04,  6.77108765e-05])
 message: 'Optimization terminated successfully.'
    nfev: 72
     nit: 8
    njev: 24
  status: 0
 success: True
       x: array([ 0.19756946, -0.59644512])


In [46]:
# Newton’s method
rat_newtons_gradient = minimize(
    rational_means_of_least_squares,
    [0, 0], 
    method="Newton-CG", 
    args=(y_k, x_k), 
    jac=jacobian(linear_means_of_least_squares), 
    tol=epsilon
)

print("Rational optimization (conjugate Newton's gradient):\n", rat_newtons_gradient)                                

Rational optimization (conjugate Newton's gradient):
      fun: 104.57769233401922
     jac: array([-3.50439778,  2.16733022])
    nfev: 3
    nhev: 0
     nit: 2
    njev: 86
  status: 3
 success: False
       x: array([0.411897  , 0.08960455])


In [47]:
# Levenberg-Marquardt algorithm
"""
"lm" : Levenberg-Marquardt algorithm as implemented in MINPACK. 
Doesn’t handle bounds and sparse Jacobians. Usually the most efficient method for small 
unconstrained problems.
"""
rat_levenberg_marquardt = curve_fit(
    rational_approximant, 
    xdata=x_k,
    ydata=y_k,
    method="lm"
)

print("Rational optimization (Levenberg-Marquardt algorithm):\n", rat_levenberg_marquardt[0])                                

Rational optimization (Levenberg-Marquardt algorithm):
 [ 0.19754269 -0.59655525]


In [48]:
y_rational_opt_gradient_descent = []
for i in range(101):
    y_rational_opt_gradient_descent.append(lin_gradient_descent.x[0] / (1 + x_k[i] * rat_gradient_descent.x[1]))

In [49]:
y_rational_opt_conjugate_gradient = []
for i in range(101):
    y_rational_opt_conjugate_gradient.append(rat_conjugate_gradient.x[0] / (1 + x_k[i] * rat_conjugate_gradient.x[1]))

In [50]:
y_rational_opt_lin_newton = []
for i in range(101):
    y_rational_opt_lin_newton.append(rat_newtons_gradient.x[0] / (1 + x_k[i] * rat_newtons_gradient.x[1]))

In [51]:
y_rational_opt_lin_levenberg_marquardt = []
for i in range(101):
    y_rational_opt_lin_levenberg_marquardt.append(rat_levenberg_marquardt[0] / (1 + x_k[i] * rat_levenberg_marquardt[1]))

In [52]:
print_optimization_plot(
    "Minimization of Rational Aapproximant", "Methods", x_k, y_init, y_k, 
    y_rational_opt_gradient_descent, y_rational_opt_conjugate_gradient,
    y_rational_opt_lin_newton, y_rational_opt_lin_levenberg_marquardt
)