In [287]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [288]:
from lib import gauss_newton, LevenbergMarquardt
import numpy as np
from typing import Callable

## Example functions

In [289]:
xs = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
ys = np.array([1, 2, 3, 4])


def f(x: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert x.shape == (2,)
    assert p.shape == (3,)

    return p[0] * x[0] + p[1] * x[1] + p[2]


def df(x: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert x.shape == (2,)
    assert p.shape == (3,)

    return np.array([x[0], x[1], 1])


def residue(f: Callable, xs: np.ndarray, ys: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert xs.shape[1] == 2
    assert xs.shape[0] == ys.shape[0]
    assert p.shape == (3,)

    fs = np.array([f(x, p) for x in xs])

    return (fs - ys) ** 2


def residue_jacobian(
    f: Callable, df: Callable, ys: np.ndarray, xs: np.ndarray, p: np.ndarray
) -> np.ndarray:
    assert xs.shape[1] == 2
    assert xs.shape[0] == ys.shape[0]
    assert p.shape == (3,)

    fs = np.array([f(x, p) for x in xs])
    dfs = np.array([df(x, p) for x in xs])

    return np.array([2 * (fs[i] - ys[i]) * dfs[i] for i in range(len(xs))])


p0 = np.random.randn(3)
alpha = 0.01
max_iter = 1000

In [290]:
residue(f, xs, ys, p0)

array([ 1.03061735,  3.31510502,  6.89741341, 11.77754252])

In [291]:
residue_jacobian(f, df, ys, xs, p0)

array([[ -2.03038652,  -4.06077303,  -2.03038652],
       [ -7.28297194, -10.92445791,  -3.64148597],
       [-15.75775627, -21.01034169,  -5.25258542],
       [-27.45473951, -34.31842438,  -6.86368488]])

In [292]:
def F(p: np.ndarray) -> np.ndarray:
    return residue(f, xs, ys, p)


def DF(p: np.ndarray) -> np.ndarray:
    return residue_jacobian(f, df, ys, xs, p)

## Gauss-Newton method

In [293]:
p, err = gauss_newton(F, DF, p0, max_iter, silent=True)
print(f"{p=}, {err=}")

p=array([ 1.52416943, -0.62486315,  0.59865771]), err=0.2200502167286744


In [294]:
np.array([f(xs[i], p) for i in range(len(xs))])

array([0.87310084, 1.77240713, 2.67171341, 3.5710197 ])

## Levenberg-Marquardt method

#### 1. Sequence $\lambda_k = const$

In [295]:
optimizer = LevenbergMarquardt(F, DF, lambda_param=LevenbergMarquardt.LambdaParam(0.1, 2, lambda_fun=lambda F, next_point, p, i: 1e-3))
p, err = optimizer.optimize(p0, max_iter, silent=True)
print(f"{p=}, {err=}")

p=array([ 1.71334249, -0.71371759,  0.71473933]), err=4.830143130226005e-07


In [296]:
np.array([f(xs[i], p) for i in range(len(xs))])

array([1.00064664, 2.00027154, 2.99989643, 3.99952133])

#### 2. Sequence $\lambda_k$ decrasing when error is decreasing, and increasing otherwise

In [297]:
optimizer = LevenbergMarquardt(F, DF)
p, err = optimizer.optimize(p0, max_iter, silent=True)
print(f"{p=}, {err=}")

p=array([ 1.71393314, -0.71393314,  0.71393314]), err=5.069761746063441e-20


In [298]:
np.array([f(xs[i], p) for i in range(len(xs))])

array([1., 2., 3., 4.])