# Direct Solvers

The purpose of this notebook is to compare commonly available direct solvers from python interface. Here we focus on multi-threaded solvers that can be run on a single node with several cores.
For now, we compare SuperLU, UMFPACK and Intel MKL PARDISO. SuperLU is the default solver in scipy, UMFPACK requires the installation of scikit-umfpack and Intel MKL PARDISO requires PyPardiso.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse.linalg as spla

import ibmos as ib

In [3]:
import scipy.linalg as la

# Case initialization
... from [Unsteady flow around cylinder (Re=200)](../1-Basic/CylinderRe200.ipynb)

In [4]:
s1 = ib.stretching(256, 0.033, 0.20, int(0.5/0.033+16), 16, 16, 0.04)
s2 = ib.stretching(128, 0.033, 0.20, int(0.5/0.033+16), 16, 16, 0.04)
x = np.r_[-s2[::-1], s1[1:]]

s = ib.stretching(192, 0.033, 0.20, int(0.5/0.033+16), 16, 16, 0.04)
y = np.r_[-s[::-1], s[1:]]

solver = ib.Solver(x, y, iRe=1/200, Co=0.4)
del x, y, s1, s2

solver.set_solids(ib.shapes.cylinder("cylinder", 0, 0, 0.5, solver.dxmin))

uBC, vBC = solver.zero_boundary_conditions()
for k in range(4):
    uBC[k][:] = 1

sBC = ((np.zeros(solver.solids[0].l), np.zeros(solver.solids[0].l)), )

In [5]:
n = 50

# Basic tests

In [6]:
A, B = solver.propagator(fractionalStep=False)
bc = solver.boundary_condition_terms(uBC, vBC, *sBC)

### Factorization

In [7]:
%%time 
splu, =ib.tools.solver_superlu(A[0].copy())
x=splu(bc)
x[solver.pStart:solver.pEnd]-=np.mean(x[solver.pStart:solver.pEnd])
print(la.norm(A[0]@x-bc)/la.norm(bc))

1.1608912400598384e-11
CPU times: user 9min 7s, sys: 12.4 s, total: 9min 19s
Wall time: 28.9 s


In [8]:
%%time 
umfp, = ib.tools.solver_umfpack(A[0].copy())
x=umfp(bc)
x[solver.pStart:solver.pEnd]-=np.mean(x[solver.pStart:solver.pEnd])
print(la.norm(A[0]@x-bc)/la.norm(bc))

3.689651412028829e-12
CPU times: user 2min 28s, sys: 4.31 s, total: 2min 33s
Wall time: 6.13 s




In [9]:
%%time 
pdiso, pdisoSolver = ib.tools.solver_pardiso(A[0].copy())
pdisoSolver.set_statistical_info_on()
x=pdiso(bc); 
x[solver.pStart:solver.pEnd]-=np.mean(x[solver.pStart:solver.pEnd])
print(la.norm(A[0]@x-bc)/la.norm(bc))

3.66303758278839e-12
CPU times: user 17.4 s, sys: 1.52 s, total: 18.9 s
Wall time: 2.19 s


### Solve

In [10]:
%%timeit -n 10
splu(bc);

263 ms ± 600 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [11]:
%%timeit -n 10
umfp(bc);



520 ms ± 11.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
%%timeit -n 10
pdiso(bc);

78 ms ± 1.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# SuperLU

In [13]:
solver.set_solver(ib.tools.solver_superlu)

## Without fractional step method

In [14]:
solver.set_fractional_step(False)

In [15]:
%time x0, *_ = solver.steps(solver.zero(), uBC, vBC, sBC, number=1, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) 
       1  1.32000e-02  4.38832e+05  3.32449e+07  2.64415e+02  2.17386e-10  1.05398e-11 
CPU times: user 9min 9s, sys: 12.7 s, total: 9min 22s
Wall time: 28.9 s


In [16]:
%time x, *_ = solver.steps(x0, uBC, vBC, sBC, number=n, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) 
       1  1.32000e-02  3.94981e+02  3.32451e+07  2.13865e+00 -2.17614e-10  9.90400e-16 
       2  2.64000e-02  3.91043e+02  1.28153e+03  2.01721e+00 -1.16852e-12  9.01481e-16 
       3  3.96000e-02  3.87951e+02  1.21412e+03  1.88682e+00  1.01575e-13  9.10295e-16 
       4  5.28000e-02  3.86173e+02  8.45545e+02  1.77396e+00 -4.27474e-13  9.04931e-16 
       5  6.60000e-02  3.85360e+02  4.44242e+02  1.67815e+00  7.35167e-13  8.02413e-16 
       6  7.92000e-02  3.84993e+02  2.18545e+02  1.59763e+00 -4.53804e-13  8.82856e-16 
       7  9.24000e-02  3.84712e+02  1.76330e+02  1.53003e+00 -7.77416e-13  9.26377e-16 
       8  1.05600e-01  3.84390e+02  2.12265e+02  1.47272e+00  2.52420e-13  9.10329e-16 
       9  1.18800e-01  3.84073e+02  2.23155e+02  1.42340e+00 -3.37206e-13  9.07201e-16 
      10  1.32000e-01  3.83840e+02  1.74508e+02  1.38050e+00 -6.01815e-13  1.00226e-15 
      11  1.45200e-01  3.83709e+

## With fractional step method

In [17]:
solver.set_fractional_step(True)

In [18]:
%time x0, *_ = solver.steps(solver.zero(), uBC, vBC, sBC, number=1, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) rel.error(C) 
       1  1.32000e-02  4.38832e+05  3.32449e+07  2.64407e+02 -1.20436e-10  1.45943e-16  6.62222e-12 
CPU times: user 9min 25s, sys: 11.1 s, total: 9min 36s
Wall time: 24.9 s


In [19]:
%time x, *_ = solver.steps(x0, uBC, vBC, sBC, number=n, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) rel.error(C) 
       1  1.32000e-02  3.94922e+02  3.32450e+07  2.13966e+00  1.19956e-10  2.11280e-16  1.42337e-14 
       2  2.64000e-02  3.91006e+02  1.27691e+03  2.01807e+00 -2.80616e-13  2.11000e-16  1.27584e-14 
       3  3.96000e-02  3.87929e+02  1.21065e+03  1.88749e+00  1.06561e-13  2.11323e-16  1.15251e-14 
       4  5.28000e-02  3.86158e+02  8.43403e+02  1.77447e+00  5.20365e-14  2.10892e-16  9.89307e-15 
       5  6.60000e-02  3.85349e+02  4.43083e+02  1.67854e+00  4.43395e-14  2.11410e-16  9.68149e-15 
       6  7.92000e-02  3.84983e+02  2.17891e+02  1.59793e+00  4.28789e-14  2.10766e-16  9.43231e-15 
       7  9.24000e-02  3.84704e+02  1.75782e+02  1.53026e+00  1.63966e-14  2.11478e-16  9.42983e-15 
       8  1.05600e-01  3.84383e+02  2.11712e+02  1.47291e+00  1.65805e-14  2.10847e-16  9.25760e-15 
       9  1.18800e-01  3.84067e+02  2.22690e+02  1.42355e+00  3.51108e-14  2.11551e-16  9.5

# UMFPACK

In [20]:
solver.set_solver(ib.tools.solver_umfpack)

## Without fracional step method

In [21]:
solver.set_fractional_step(False)

In [22]:
%time x0, *_ = solver.steps(solver.zero(), uBC, vBC, sBC, number=1, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) 
       1  1.32000e-02  4.38832e+05  3.32449e+07  2.64415e+02  2.06501e-14  3.60567e-12 
CPU times: user 2min 35s, sys: 4.41 s, total: 2min 40s
Wall time: 6.44 s




In [23]:
%time x, *_ = solver.steps(x0, uBC, vBC, sBC, number=n, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) 




       1  1.32000e-02  3.94981e+02  3.32451e+07  2.13865e+00 -2.43659e-14  9.91994e-16 
       2  2.64000e-02  3.91043e+02  1.28153e+03  2.01721e+00  6.32758e-14  1.83988e-15 
       3  3.96000e-02  3.87951e+02  1.21412e+03  1.88682e+00  5.90066e-14  2.01201e-15 
       4  5.28000e-02  3.86173e+02  8.45545e+02  1.77396e+00 -3.48315e-14  2.68090e-15 
       5  6.60000e-02  3.85360e+02  4.44242e+02  1.67815e+00  2.47025e-14  1.86963e-15 
       6  7.92000e-02  3.84993e+02  2.18545e+02  1.59763e+00 -4.43257e-14  1.39626e-15 
       7  9.24000e-02  3.84712e+02  1.76330e+02  1.53003e+00  6.45699e-14  9.05537e-16 
       8  1.05600e-01  3.84390e+02  2.12265e+02  1.47272e+00 -3.68455e-14  4.67296e-16 
       9  1.18800e-01  3.84073e+02  2.23155e+02  1.42340e+00  5.91714e-14  1.59089e-15 
      10  1.32000e-01  3.83840e+02  1.74508e+02  1.38050e+00 -1.85390e-14  1.79298e-15 
      11  1.45200e-01  3.83709e+02  1.02901e+02  1.34311e+00  5.07754e-15  9.03072e-16 
      12  1.58400e-01  3.83642e+

## With fractional step method

In [24]:
solver.set_fractional_step(True)

In [25]:
%time x0, *_ = solver.steps(solver.zero(), uBC, vBC, sBC, number=1, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) rel.error(C) 
       1  1.32000e-02  4.38832e+05  3.32449e+07  2.64407e+02  8.84648e-11  1.19136e-16  2.71190e-11 
CPU times: user 2min 2s, sys: 3.28 s, total: 2min 5s
Wall time: 5.13 s




In [26]:
%time x, *_ = solver.steps(x0, uBC, vBC, sBC, number=n, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) rel.error(C) 




       1  1.32000e-02  3.94922e+02  3.32450e+07  2.13966e+00 -8.44622e-11  1.20649e-16  1.77748e-14 
       2  2.64000e-02  3.91006e+02  1.27691e+03  2.01807e+00 -2.06142e-12  1.20656e-16  1.60072e-14 
       3  3.96000e-02  3.87929e+02  1.21065e+03  1.88749e+00  1.80097e-13  1.20272e-16  1.49889e-14 
       4  5.28000e-02  3.86158e+02  8.43403e+02  1.77447e+00 -1.90009e-13  1.20495e-16  1.35775e-14 
       5  6.60000e-02  3.85349e+02  4.43083e+02  1.67854e+00 -3.07764e-13  1.20568e-16  1.37256e-14 
       6  7.92000e-02  3.84983e+02  2.17891e+02  1.59793e+00 -2.60616e-13  1.20236e-16  1.30229e-14 
       7  9.24000e-02  3.84704e+02  1.75782e+02  1.53026e+00 -1.34823e-13  1.20938e-16  1.25491e-14 
       8  1.05600e-01  3.84383e+02  2.11712e+02  1.47291e+00 -6.14422e-14  1.21036e-16  1.34195e-14 
       9  1.18800e-01  3.84067e+02  2.22690e+02  1.42355e+00 -6.56315e-14  1.20771e-16  1.26875e-14 
      10  1.32000e-01  3.83835e+02  1.74215e+02  1.38061e+00 -9.08596e-14  1.20690e-16  1.2

# PARDISO

In [27]:
solver.set_solver(ib.tools.solver_pardiso)

## Without fractional step method

In [28]:
solver.set_fractional_step(False)

In [29]:
%time x0, *_ = solver.steps(solver.zero(), uBC, vBC, sBC, number=1, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) 
       1  1.32000e-02  4.38832e+05  3.32449e+07  2.64415e+02  4.07907e-12  3.27767e-12 
CPU times: user 25.1 s, sys: 1.71 s, total: 26.8 s
Wall time: 2.1 s


In [30]:
%time x, *_ = solver.steps(x0, uBC, vBC, sBC, number=n, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) 
       1  1.32000e-02  3.94981e+02  3.32451e+07  2.13865e+00 -4.43174e-12  1.49293e-16 
       2  2.64000e-02  3.91043e+02  1.28153e+03  2.01721e+00 -8.85889e-14  1.48661e-16 
       3  3.96000e-02  3.87951e+02  1.21412e+03  1.88682e+00 -4.79113e-14  1.49012e-16 
       4  5.28000e-02  3.86173e+02  8.45545e+02  1.77396e+00 -3.85785e-14  1.49075e-16 
       5  6.60000e-02  3.85360e+02  4.44242e+02  1.67815e+00 -2.72369e-14  1.49100e-16 
       6  7.92000e-02  3.84993e+02  2.18545e+02  1.59763e+00 -3.60493e-14  1.49120e-16 
       7  9.24000e-02  3.84712e+02  1.76330e+02  1.53003e+00 -3.43128e-14  1.49583e-16 
       8  1.05600e-01  3.84390e+02  2.12265e+02  1.47272e+00 -6.70124e-15  1.49273e-16 
       9  1.18800e-01  3.84073e+02  2.23155e+02  1.42340e+00 -3.72011e-14  1.49553e-16 
      10  1.32000e-01  3.83840e+02  1.74508e+02  1.38050e+00 -1.89120e-14  1.48897e-16 
      11  1.45200e-01  3.83709e+

## With fractional step method

In [31]:
solver.set_fractional_step(True)

In [32]:
%time x0, *_ = solver.steps(solver.zero(), uBC, vBC, sBC, number=1, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) rel.error(C) 
       1  1.32000e-02  4.38832e+05  3.32449e+07  2.64407e+02 -1.58088e-10  1.49191e-16  1.18592e-11 
CPU times: user 47.5 s, sys: 3.79 s, total: 51.3 s
Wall time: 2.8 s


In [33]:
%time x, *_ = solver.steps(x0, uBC, vBC, sBC, number=n, checkSolvers=True)

       k            t          x_2       dxdt_2  cylinder_fx  cylinder_fy rel.error(A) rel.error(C) 
       1  1.32000e-02  3.94922e+02  3.32450e+07  2.13966e+00  1.56712e-10  1.89534e-16  1.19582e-14 
       2  2.64000e-02  3.91006e+02  1.27691e+03  2.01807e+00  1.28061e-13  1.90728e-16  1.06997e-14 
       3  3.96000e-02  3.87929e+02  1.21065e+03  1.88749e+00  8.86045e-14  1.89887e-16  9.73292e-15 
       4  5.28000e-02  3.86158e+02  8.43403e+02  1.77447e+00  9.06931e-14  1.90528e-16  8.67888e-15 
       5  6.60000e-02  3.85349e+02  4.43083e+02  1.67854e+00  8.10341e-14  1.89998e-16  8.44158e-15 
       6  7.92000e-02  3.84983e+02  2.17891e+02  1.59793e+00  3.28505e-14  1.89871e-16  8.92931e-15 
       7  9.24000e-02  3.84704e+02  1.75782e+02  1.53026e+00  3.41116e-14  1.89020e-16  8.77002e-15 
       8  1.05600e-01  3.84383e+02  2.11712e+02  1.47291e+00  3.16795e-14  1.90681e-16  8.04593e-15 
       9  1.18800e-01  3.84067e+02  2.22690e+02  1.42355e+00  1.30573e-14  1.89326e-16  6.9