In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from lib.lib import gauss_newton, LevenbergMarquardt, DifferentiableFunction
import numpy as np
from typing import Callable

## Example functions

In [3]:
xs = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
ys = np.array([1, 2, 3, 4])


def f(x: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert x.shape == (2,)
    assert p.shape == (3,)

    return p[0] * x[0] + p[1] * x[1] + p[2]


def df(x: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert x.shape == (2,)
    assert p.shape == (3,)

    return np.array([x[0], x[1], 1])


def residue(f: Callable, xs: np.ndarray, ys: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert xs.shape[1] == 2
    assert xs.shape[0] == ys.shape[0]
    assert p.shape == (3,)

    fs = np.array([f(x, p) for x in xs])

    return (fs - ys) ** 2


def residue_jacobian(
    f: Callable, df: Callable, ys: np.ndarray, xs: np.ndarray, p: np.ndarray
) -> np.ndarray:
    assert xs.shape[1] == 2
    assert xs.shape[0] == ys.shape[0]
    assert p.shape == (3,)

    fs = np.array([f(x, p) for x in xs])
    dfs = np.array([df(x, p) for x in xs])

    return np.array([2 * (fs[i] - ys[i]) * dfs[i] for i in range(len(xs))])



p0 = np.random.randn(3)
alpha = 0.01
max_iter = 1000

In [4]:
residue(f, xs, ys, p0)

array([0.14656711, 1.03823   , 2.73912629, 5.24925599])

In [5]:
residue_jacobian(f, df, ys, xs, p0)

array([[ -0.76568167,  -1.53136334,  -0.76568167],
       [ -4.07574288,  -6.11361431,  -2.03787144],
       [ -9.93018361, -13.24024482,  -3.3100612 ],
       [-18.32900388, -22.91125485,  -4.58225097]])

In [6]:
def F_value(p: np.ndarray) -> np.ndarray:
    return residue(f, xs, ys, p)


def F_derivative(p: np.ndarray) -> np.ndarray:
    return residue_jacobian(f, df, ys, xs, p)

F = DifferentiableFunction(F_value, F_derivative)

## Gauss-Newton method

In [7]:
p, err = gauss_newton(F, p0, max_iter, silent=True)
print(f"{p=}, {err=}")

p=array([ 0.65366127,  0.34633873, -0.34633873]), err=5.093195125717395e-21


In [8]:
np.array([f(xs[i], p) for i in range(len(xs))])

array([1., 2., 3., 4.])

## Levenberg-Marquardt method

#### 1. Constant $\lambda_k$ sequence

In [9]:
lambda_param = 1e-3
optimizer = LevenbergMarquardt(F, lambda_param_fun=LevenbergMarquardt.LambdaParamConstant(lambda0=lambda_param))
p, err = optimizer.optimize(p0, max_iter, silent=True)
print(f"{p=}, {err=}")

p=array([ 1.32372541, -0.32412297,  0.32520588]), err=5.42643875939047e-07


In [10]:
np.array([f(xs[i], p) for i in range(len(xs))])

array([1.00068535, 2.00028779, 2.99989023, 3.99949268])

#### 2. Sequence $\lambda_k$ decrasing when error is decreasing, and increasing otherwise

In [11]:
optimizer = LevenbergMarquardt(F, lambda_param_fun=LevenbergMarquardt.LambdaParamDefaultOptimizer(lambda0=0.1, lambda_change=2))
p, err = optimizer.optimize(p0, max_iter, silent=True)
print(f"{p=}, {err=}")

p=array([ 1.32435142, -0.32435142,  0.32435142]), err=5.069742900190526e-20


In [12]:
np.array([f(xs[i], p) for i in range(len(xs))])

array([1., 2., 3., 4.])