In [499]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [500]:
from lib import gradient_descent, gauss_newton
import numpy as np

In [501]:
xs = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
ys = np.array([1, 2, 3, 4])

def f(x: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert x.shape == (2,)
    assert p.shape == (3,)

    return p[0] * x[0] + p[1] * x[1] + p[2]

def df(x: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert x.shape == (2,)
    assert p.shape == (3,)

    return np.array([x[0], x[1], 1])

def residue(f: callable, xs: np.ndarray, ys: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert xs.shape[1] == 2
    assert xs.shape[0] == ys.shape[0]
    assert p.shape == (3,)

    fs = np.array([f(x, p) for x in xs])

    return (fs - ys)**2

def residue_jacobian(f: callable, df: callable, ys: np.ndarray, xs: np.ndarray, p: np.ndarray) -> np.ndarray:
    assert xs.shape[1] == 2
    assert xs.shape[0] == ys.shape[0]
    assert p.shape == (3,)

    fs = np.array([f(x, p) for x in xs])
    dfs = np.array([df(x, p) for x in xs])

    return np.array([2 * (fs[i] - ys[i]) * dfs[i] for i in range(len(xs))])

p0 = np.random.randn(3)
alpha = 0.01
max_iter = 1000

In [502]:
residue(f, xs, ys, p0)

array([ 3.4059043 ,  8.40952191, 15.63669344, 25.08741888])

In [503]:
residue_jacobian(f, df, ys, xs, p0)

array([[ -3.69101845,  -7.3820369 ,  -3.69101845],
       [-11.59967028, -17.39950542,  -5.79983514],
       [-23.72595549, -31.63460732,  -7.90865183],
       [-40.06987408, -50.0873426 , -10.01746852]])

In [504]:
def F(p: np.ndarray) -> np.ndarray:
    return residue(f, xs, ys, p)

def DF(p: np.ndarray) -> np.ndarray:
    return residue_jacobian(f, df, ys, xs, p)

In [506]:
h = DF(p0).T @ DF(p0)
h

array([[2316.69174   , 2986.63055584,  669.93881584],
       [2986.63055584, 3866.72752578,  880.09696994],
       [ 669.93881584,  880.09696994,  210.1581541 ]])

In [507]:
np.linalg.inv(h)

array([[-4.39804651e+12,  4.39804651e+12, -4.39804651e+12],
       [ 4.39804651e+12, -4.39804651e+12,  4.39804651e+12],
       [-4.39804651e+12,  4.39804651e+12, -4.39804651e+12]])

In [508]:
p = gauss_newton(F, DF, p0, max_iter)
p

iter 0: p = [ 1.00897294 -1.06338128  0.27228041], ||F(p)|| = 30.9225647495773
iter 1: p = [ 1.33593917 -0.86314335  0.46759291], ||F(p)|| = 7.730641187394325
iter 2: p = [ 1.40176604 -0.66536813  0.46759291], ||F(p)|| = 1.9326602968485833
iter 3: p = [ 1.42842947 -0.56023052  0.46134291], ||F(p)|| = 0.48316507421214516
iter 4: p = [ 1.46637056 -0.53227109  0.48282728], ||F(p)|| = 0.12079126855303667
iter 5: p = [ 1.45374234 -0.4866926   0.4619707 ], ||F(p)|| = 0.03019781713825916
iter 6: p = [ 1.4451612  -0.46163633  0.44927538], ||F(p)|| = 0.007549454284564913
iter 7: p = [ 1.45410848 -0.46234605  0.45616557], ||F(p)|| = 0.0018873635711411764
iter 8: p = [ 1.45432323 -0.45844201  0.45535177], ||F(p)|| = 0.0004718408927853222


array([ 1.45432323, -0.45844201,  0.45535177])