# Imports

In [None]:
# Autoreload modules
%load_ext autoreload
%autoreload 2

In [None]:
from uom_project import poisson_solvers, streamfunction_vorticity_newton

import numpy as np
import scipy
from scipy import sparse

In [None]:
from functools import partial

from fastcore.test import test_eq, test_close

# Nonlinear solvers

## Root

In [None]:
# Given the vorticity, solve the Poisson eqn. to find the streamfunction
def get_standard_basis_vector(size, i):
    vec = np.zeros((size, ))
    vec[i] = 1.0
    
    return vec


def make_get_jacobian(f, sparse=False):
    def get_jacobian(x, Re, kernel_matrix):
        N = int(np.sqrt(x.shape[0] // 2 + 1))
        h = 1 / N

        f_evaluated = f(x=x, Re=Re, kernel_matrix=kernel_matrix)

        out = np.vstack([(
                f(
                    x=x + h*get_standard_basis_vector(size=x.shape[0], i=i),
                    Re=Re, kernel_matrix=kernel_matrix
                ) -
                f_evaluated
            ) for i in range(x.shape[0])
        ]).T / h

        if sparse: out = scipy.sparse.csr_matrix(out)

        return out

    return get_jacobian


def f(x, Re, kernel_matrix, U_wall_top):
    N = int(np.sqrt(x.shape[0] // 2 + 1))
    h = 1 / N

    psi = x[:(N-1)**2]
    w_left   = x[(N-1)**2 + 0*(N-1) : (N-1)**2 + 1*(N-1)]
    w_right  = x[(N-1)**2 + 1*(N-1) : (N-1)**2 + 2*(N-1)]
    w_bottom = x[(N-1)**2 + 2*(N-1) : (N-1)**2 + 3*(N-1)]
    w_top    = x[(N-1)**2 + 3*(N-1) : (N-1)**2 + 4*(N-1)]
    w_middle = x[(N-1)**2 + 4*(N-1) :]

    # Calculate the equations coming from the Poisson equation
    f_poisson = kernel_matrix @ psi
    f_poisson = f_poisson + h ** 2 * w_middle

    psi = psi.reshape(N-1, N-1)

    # Calculate contributions coming from the vorticity transport equation
    w_middle = w_middle.reshape(N-1, N-1)
    
    # Calculate the sides first
    # y = 0, U_wall = 0
    f_w_bottom = h ** 2 * (w_middle[:, 0] + 3 * w_bottom) + 8 * psi[:, 0]
    # y = 1, U_wall is known here
    f_w_top = h ** 2 * (w_middle[:, -1] + 3 * w_top) + 8 * (
        h * U_wall_top + psi[:, -1]
    )
    # x = 0
    f_w_left = h ** 2 * (w_middle[0, :] + 3 * w_left) + 8 * psi[0, :]
    # x = 1
    f_w_right = h ** 2 * (w_middle[-1, :] + 3 * w_right) + 8 * psi[-1, :]

    f_w_middle = -4 * w_middle
    f_w_middle[:-1, :] += w_middle[1:, :]
    f_w_middle[-1:, :] += w_right
    f_w_middle[1:, :] += w_middle[:-1, :]
    f_w_middle[:1, :] += w_left
    f_w_middle[:, :-1] += w_middle[:, 1:]
    f_w_middle[:, -1] += w_top
    f_w_middle[:, 1:] += w_middle[:, :-1]
    f_w_middle[:, 0] += w_bottom

    f_w_middle[1:-1, 1:-1] += Re * (
        (psi[2:, 1:-1] - psi[:-2, 1:-1]) * (w_middle[1:-1, 2:] - w_middle[1:-1, :-2]) -
        (psi[1:-1, 2:] - psi[1:-1, :-2]) * (w_middle[2:, 1:-1] - w_middle[:-2, 1:-1])
    ) / 4
    f_w_middle[:1, 1:-1] += Re * (
        psi[1, 1:-1] * (w_middle[0, 2:] - w_middle[0, :-2]) -
        (psi[0, 2:] - psi[0, :-2]) * (w_middle[1, 1:-1] - w_left[1:-1])
    ) / 4
    f_w_middle[-1:, 1:-1] -= Re * (
        psi[-2, 1:-1] * (w_middle[-1, 2:] - w_middle[-1, :-2]) +
        (psi[-1, 2:] - psi[-1, :-2]) * (w_right[1:-1] - w_middle[-2, 1:-1])
    ) / 4
    f_w_middle[1:-1, 0] += Re * (
        (psi[2:, 0] - psi[:-2, 0]) * (w_middle[1:-1, 1] - w_bottom[1:-1]) -
        psi[1:-1, 1] * (w_middle[2:, 0] - w_middle[:-2, 0])
    ) / 4
    f_w_middle[1:-1, -1] += Re * (
        (psi[2:, -1] - psi[:-2, -1]) * (w_top[1:-1] - w_middle[1:-1, -2]) +
        psi[1:-1, -2] * (w_middle[2:, -1] - w_middle[:-2, -1])
    ) / 4
    f_w_middle[0, 0] += Re * (
        psi[1, 0] * (w_middle[0, 1] - w_bottom[0]) -
        psi[0, 1] * (w_middle[1, 0] - w_left[0])
    ) / 4
    f_w_middle[-1, 0] -= Re * (
        psi[-2, 0] * (w_middle[-1, 1] - w_bottom[-1]) +
        psi[-1, 1] * (w_right[0] - w_middle[-2, 0])
    ) / 4
    f_w_middle[0, -1] += Re * (
        psi[1, -1] * (w_top[0] - w_middle[0, -2]) +
        psi[0, -2] * (w_middle[1, -1] - w_left[-1])
    ) / 4
    f_w_middle[-1, -1] -= Re * (
        psi[-2, -1] * (w_top[-1] - w_middle[-1, -2]) -
        psi[-1, -2] * (w_right[-1] - w_middle[-2, -1])
    ) / 4

    return np.concatenate([
        f_poisson, f_w_left, f_w_right, f_w_bottom, f_w_top, f_w_middle.flatten()
    ], axis=0)


def make_f(U_wall_top):
    return partial(f, U_wall_top=U_wall_top)


In [None]:
def nonlinear_root_solver(f, N, Re, algorithm, **kwargs):
   
    solution = scipy.optimize.root(
        fun=f,
        x0=np.zeros(((N - 1) ** 2 + (N + 1) ** 2 - 4, )),
        method=algorithm,
        args=(
            Re, poisson_solvers.construct_laplacian_kernel_matrix(N=N-1, h=1),
        ),
        **kwargs,
    )

    psi, w = solution.x[:(N - 1) ** 2], solution.x[(N - 1) ** 2:]
    
    # Get final psi
    psi = psi.reshape(N - 1, N - 1)
    psi = np.pad(psi, (1, 1), mode="constant", constant_values=0)
    
    # Get final w
    w = streamfunction_vorticity_newton.reconstruct_w(w_tmp=w[:, None], N=N)
    w = w.reshape(N + 1, N + 1)
    
    return w, psi, solution

In [None]:
%%time

N = 20
Re = 0 # i.e. viscosity mu = inf
U_wall_top = np.sin(np.pi * np.arange(1, N) / N) ** 2

w, psi, n_iter = streamfunction_vorticity_newton.newton_solver(
    f=partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top),
    get_jacobian=streamfunction_vorticity_newton.get_jacobian, N=N, Re=Re,
)

fun = make_f(U_wall_top=U_wall_top)
jac = make_get_jacobian(f=fun)
options = {
    "line_search": None,
    "jac_options": {
        "reduction_method": "restart",
    },
}
w2, psi2, solution = nonlinear_root_solver(
    f=fun, N=N, Re=Re,
    algorithm="broyden2",
    options=options,
    tol=1e-12,
)
print(solution)

test_eq(np.allclose(w, w2), True)
test_eq(np.allclose(psi, psi2), True)
test_close(w, w2, eps=1e-8)
test_close(psi, psi2, eps=1e-8)
test_eq(n_iter, 1)

 message: A solution was found at the specified tolerance.
 success: True
  status: 1
     fun: [-3.346e-13  2.769e-13 ...  6.111e-13  5.016e-13]
       x: [ 2.136e-06 -2.671e-05 ...  1.121e+00  5.920e-01]
     nit: 410
CPU times: user 529 ms, sys: 12.2 ms, total: 541 ms
Wall time: 625 ms


In [None]:
%%time

N = 20
Re = 10
U_wall_top = np.sin(np.pi * np.arange(1, N) / N) ** 2

w, psi, n_iter = streamfunction_vorticity_newton.newton_solver(
    f=partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top),
    get_jacobian=streamfunction_vorticity_newton.get_jacobian, N=N, Re=Re,
)

fun = make_f(U_wall_top=U_wall_top)
jac = make_get_jacobian(f=fun)
options = {
    "line_search": None,
    "jac_options": {
        "reduction_method": "restart",
        # "reduction_method": "svd",
    },
}
w2, psi2, solution = nonlinear_root_solver(
    f=fun, N=N, Re=Re,
    # jac=jac,
    algorithm="broyden2", # only other reasonable options is "hybr"
    options=options,
    tol=1e-11,
)
print(solution)

# test_eq(np.allclose(w, w2, atol=1e-7), True)
test_eq(np.allclose(w, w2), True)
test_eq(np.allclose(psi, psi2), True)
test_close(w, w2, eps=1e-7)
test_close(psi, psi2, eps=1e-8)

 message: A solution was found at the specified tolerance.
 success: True
  status: 1
     fun: [-4.566e-12  1.965e-12 ... -8.263e-12  1.034e-10]
       x: [ 1.869e-06 -2.748e-05 ...  1.171e+00  6.297e-01]
     nit: 464
CPU times: user 976 ms, sys: 20.8 ms, total: 997 ms
Wall time: 1.36 s


In [None]:
#|eval: false
f_tmp = partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top)
get_jac_tmp = streamfunction_vorticity_newton.get_jacobian
%timeit -r 30 streamfunction_vorticity_newton.newton_solver(f_tmp, get_jac_tmp, N, Re)

605 ms ± 220 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [None]:
#|eval: false
%timeit -r 30 nonlinear_root_solver(f=fun, N=N, Re=Re, jac=jac, algorithm="hybr")

982 ms ± 198 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [None]:
#|eval: false

options = {
    "line_search": None,
    "jac_options": {
        "reduction_method": "restart",
        # "reduction_method": "svd",
    },
}
%timeit -r 30  nonlinear_root_solver(f=fun, N=N, Re=Re, algorithm="broyden2", options=options, tol=1e-11)

447 ms ± 116 ms per loop (mean ± std. dev. of 30 runs, 1 loop each)


In [None]:
#| eval: false
N = 40
Re = 10
U_wall_top = np.sin(np.pi * np.arange(1, N) / N) ** 2

w, psi, n_iter = streamfunction_vorticity_newton.newton_solver(
    f=partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top),
    get_jacobian=streamfunction_vorticity_newton.get_jacobian, N=N, Re=Re,
)

fun = make_f(U_wall_top=U_wall_top)
jac = make_get_jacobian(f=fun)
w2, psi2, solution = nonlinear_root_solver(
    f=fun, N=N, Re=Re,
    jac=jac,
    algorithm="hybr", # "broyden2" does not converge
)
print(solution)

test_eq(np.allclose(w, w2), True)
test_eq(np.allclose(psi, psi2), True)
test_close(w, w2, eps=1e-8)
test_close(psi, psi2, eps=1e-8)

 message: The solution converged.
 success: True
  status: 1
     fun: [-2.118e-22  0.000e+00 ... -2.220e-12 -4.133e-13]
       x: [ 1.155e-06  1.692e-06 ...  9.333e-01  4.837e-01]
    nfev: 11
    njev: 1
    fjac: [[-3.310e-01  8.276e-02 ... -1.230e-12 -4.308e-13]
           [ 8.593e-02 -4.342e-01 ... -1.484e-12 -5.293e-13]
           ...
           [ 4.384e-07  4.585e-07 ... -5.992e-04  2.236e-04]
           [ 2.706e-07  5.204e-07 ... -1.505e-05 -3.251e-04]]
       r: [ 1.208e+01 -6.621e-01 ... -1.813e-03  2.011e-03]
     qtf: [ 1.210e-17 -1.656e-17 ... -6.055e-13 -1.300e-13]


In [None]:
#|eval: false
f_tmp = partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top)
get_jac_tmp = streamfunction_vorticity_newton.get_jacobian
%timeit streamfunction_vorticity_newton.newton_solver(f_tmp, get_jac_tmp, N, Re)

2.89 s ± 247 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Least squares

In [None]:
def nonlinear_lstsq_solver(f, N, Re, algorithm, **kwargs):
   
    solution = scipy.optimize.least_squares(
        fun=f,
        x0=np.zeros(((N - 1) ** 2 + (N + 1) ** 2 - 4, )),
        method=algorithm,
        args=(
            Re, poisson_solvers.construct_laplacian_kernel_matrix(N=N-1, h=1),
        ),
        **kwargs,
    )

    psi, w = solution.x[:(N - 1) ** 2], solution.x[(N - 1) ** 2:]
    
    # Get final psi
    psi = psi.reshape(N - 1, N - 1)
    psi = np.pad(psi, (1, 1), mode="constant", constant_values=0)
    
    # Get final w
    w = streamfunction_vorticity_newton.reconstruct_w(w_tmp=w[:, None], N=N)
    w = w.reshape(N + 1, N + 1)
    
    return w, psi, solution

In [None]:
%%time

N = 20 # N = 40 too slow for the least squares solver
Re = 10
U_wall_top = np.sin(np.pi * np.arange(1, N) / N) ** 2

w, psi, n_iter = streamfunction_vorticity_newton.newton_solver(
    f=partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top),
    get_jacobian=streamfunction_vorticity_newton.get_jacobian, N=N, Re=Re,
)

fun = make_f(U_wall_top=U_wall_top)
jac = make_get_jacobian(f=fun)
w2, psi2, solution = nonlinear_lstsq_solver(
    f=fun, N=N, Re=Re,
    jac=jac,
    algorithm="lm",
)
print(solution)

test_eq(np.allclose(w, w2), True)
test_eq(np.allclose(psi, psi2), True)
test_close(w, w2, eps=1e-8)
test_close(psi, psi2, eps=1e-8)

     message: `xtol` termination condition is satisfied.
     success: True
      status: 3
         fun: [-1.355e-20  2.711e-20 ...  1.839e-16 -1.596e-16]
           x: [ 1.869e-06 -2.748e-05 ...  1.171e+00  6.297e-01]
        cost: 4.7132742121658445e-29
         jac: [[-4.000e+00  1.000e+00 ...  0.000e+00  0.000e+00]
               [ 1.000e+00 -4.000e+00 ...  0.000e+00  0.000e+00]
               ...
               [ 0.000e+00  0.000e+00 ... -4.000e+00  1.021e+00]
               [ 0.000e+00  0.000e+00 ...  9.880e-01 -4.000e+00]]
        grad: [-7.036e-18  1.299e-17 ... -9.936e-16  8.329e-16]
  optimality: 3.2106471208879866e-14
 active_mask: [0 0 ... 0 0]
        nfev: 5
        njev: 4
CPU times: user 2.64 s, sys: 59.2 ms, total: 2.7 s
Wall time: 3.45 s


In [None]:
#|eval: false
f_tmp = partial(streamfunction_vorticity_newton.f, U_wall_top=U_wall_top)
get_jac_tmp = streamfunction_vorticity_newton.get_jacobian
# %timeit -r 30 streamfunction_vorticity_newton.newton_solver(f_tmp, get_jac_tmp, N, Re)
%timeit streamfunction_vorticity_newton.newton_solver(f_tmp, get_jac_tmp, N, Re)

700 ms ± 116 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
#|eval: false
jac = make_get_jacobian(f=fun)
%timeit nonlinear_lstsq_solver(f=fun, N=N, Re=Re, jac=jac, algorithm="lm")

2.27 s ± 208 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Minimization solvers

In [None]:
# NOTE: very slow, not even worth testing
def nonlinear_minimization_solver(f, N, Re, algorithm, **kwargs):

    solution = scipy.optimize.minimize(
        fun=fun,
        x0=np.zeros(((N - 1) ** 2 + (N + 1) ** 2 - 4, )),
        method=algorithm,
        args=(
            Re, poisson_solvers.construct_laplacian_kernel_matrix(N=N-1, h=1),
        ),
        **kwargs,
    )

    psi, w = solution.x[:(N - 1) ** 2], solution.x[(N - 1) ** 2:]
    
    # Get final psi
    psi = psi.reshape(N - 1, N - 1)
    psi = np.pad(psi, (1, 1), mode="constant", constant_values=0)
    
    # Get final w
    w = streamfunction_vorticity_newton.reconstruct_w(w_tmp=w[:, None], N=N)
    w = w.reshape(N + 1, N + 1)
    
    return w, psi, solution