In [1]:
import ipyparallel as ipp

from ipyparallel import Cluster
c = await Cluster(engines="mpi", profile="myprofile").start_and_connect(n=4, activate=True)
c.ids

Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>


  0%|          | 0/4 [00:00<?, ?engine/s]

[0, 1, 2, 3]

## Smoothers

Various Gauss-Seidel-type MPI-parallel, multiplicative smoothers that overlap MPI and communication:
* Regular Gauss-Seidel
* Block-Gauss-Seidel
* Dynamic-Block-Gauss-Seidel (quite fast for high-order FEM matrices)

Let us set up a Stokes problem on the unit cube to demonstrate

In [2]:
%%px
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ngsolve import *
from ngsolve.webgui import Draw
import ngsolve as ngs
import netgen as ng
import NgsAMG as amg
import mpi4py.MPI as MPI
from netgen.csg import unit_cube
from usrMtgStuffPar import StokesHDGDiscretization, MakeFacetBlocks 

def gen_ref_mesh (geo, maxh, nref, comm):
    ngs.ngsglobals.msg_level = 1
    if comm.rank==0:
        ngm = geo.GenerateMesh(maxh=maxh)
        if comm.size > 0:
            ngm.Distribute(comm)
    else:
        ngm = ng.meshing.Mesh.Receive(comm)
    ngm.SetGeometry(geo)
    for l in range(nref):
        ngm.Refine()
    return geo, ngs.comp.Mesh(ngm)

geo,mesh = gen_ref_mesh(unit_cube, maxh=0.2, nref=1, comm=MPI.COMM_WORLD)
(V, a, f, u) = StokesHDGDiscretization(mesh, order=4, wall="", inlet="", outlet=".*", nu=1e0, div_div_pen=0)
a.Assemble()

# blocks to use for smoothers later
blocks = MakeFacetBlocks(V, V.FreeDofs(True))

%px:   0%|          | 0/4 [00:00<?, ?tasks/s]

[stdout:0]  Start Findpoints
 Find edges
 Start Findpoints
 Find edges
 Start Findpoints
 Find edges
 Surface 1 / 6
 Surface 2 / 6
 Surface 3 / 6
 Surface 4 / 6
 Surface 5 / 6
 Surface 6 / 6
 Delaunay meshing
 Volume Optimization
 Send/Receive mesh
 update parallel topology
 update parallel topology
 update parallel topology
 update parallel topology
(globally) created 12356 facet-blocks of average size 32.9799287795403


#### How to use NgsAMG smoothers

Drop-in replacementfor NGSolve **multiplicative** GS/BGS smoothers, additive not supported, some additional optional parameters in Smooth/SmoothBack.


In [3]:
from ngsolve import *
import NgsAMG as amg

class SmootherAsPrecond (BaseMatrix):
    def __init__(self, smoother, mat, ngsSmoother=True):
        super(SmootherAsPrecond, self).__init__()
        self.ngsSmoother = ngsSmoother # smooth with residuum
        self.A = mat
        self.S = smoother
        self.res = self.S.CreateColVector()
    def IsComplex(self):
        return False
    def Height(self):
        return self.S.height
    def Width(self):
        return self.S.width
    def CreateColVector(self):
        return self.S.CreateColVector()
    def CreateRowVector(self):
        return self.S.CreateRowVector()
    def MultAdd(self, scal, b, x):
        self.Mult(b, self.xtemp)
        x.data += scal * self.xtemp
    def MultTransAdd(self, scal, b, x):
        self.MultAdd(scal, b, x)
    def MultTrans(self, b, x):
        self.Mult(b, x)
    def Mult(self, b, x):
        x[:] = 0.0
        if not self.ngsSmoother:
            # update residual with forward smooth
            self.res.data = b
            self.S.Smooth(x, b, self.res, x_zero=True, res_updated=True, update_res=True)
            self.S.SmoothBack(x, b, self.res, x_zero=False, res_updated=True, update_res=False)
        else:
            self.S.Smooth(x, b)
            self.S.SmoothBack(x, b)

#### Gauss-Seidel & Block-Gauss-Seidel

Limitation for Block-smoothers in parallel:
 * Blocks may not cross MPI subdomain boundaries
 * Each DOF is owned by the master rank, it will only be included in blocks on that rank

Some examples:
 * Works: blocks of all cell/face/edge/vertex-DOFs
 * Does not work: face/edge-patch, facet-plus-cells
 * Does not work as expected: element, i.e. cell-plus-face-plus-edge-plus-vertex; Master of each DOF will own it, no update from other ranks!



In [5]:
%%px
from usrMtgStuffPar import TestSmoother

if MPI.COMM_WORLD.size == 1:
    # NGSolve built-in smoothers
    gs = a.mat.CreateSmoother(V.FreeDofs(True))
    bgs = a.mat.CreateBlockSmoother(blocks)

# NgsAMG hybrid smoothers - MPI-parallel & communication overlapping
hybGS = amg.CreateHybridGSS(mat=a.mat,freedofs=V.FreeDofs(True))
hybBGS = amg.CreateHybridBlockGSS(mat=a.mat,blocks=blocks)

if MPI.COMM_WORLD.size == 1:
    TestSmoother(gs, a.mat, True, "NgSolve-GS")
TestSmoother(hybGS, a.mat, True, "NgsAMG-GS")

if MPI.COMM_WORLD.size == 1:
    TestSmoother(bgs, a.mat, True,    "NgSolve-Block-GS")
TestSmoother(hybBGS, a.mat, True, "NgsAMG-Block-GS")

%px:   0%|          | 0/4 [00:00<?, ?tasks/s]

[stdout:0] 
Testing smoother NgsAMG-GS:
  If used as preconditioner:
      lam min:   0.002401380524102075
      lam max:   0.9985004813724339
      condition: 415.80268989055514
  sec per smooth forward:   0.03508602346276525
  sec per smooth backward:  0.04652794687316115

Testing smoother NgsAMG-Block-GS:
  If used as preconditioner:
      lam min:   0.0025141236281681347
      lam max:   0.9994916304393018
      condition: 397.55070881998
  sec per smooth forward:   0.03744652218957853
  sec per smooth backward:  0.048645276690638256


#### Dynamic Block-Gauss-Seidel

Sparse Matrix and Smoother implementation that exploits repetitive sparsity pattern of HO matrices.


In [7]:
%%px
from usrMtgStuffPar import TestSPMV

A = a.mat
dynA = amg.ConvertDynBlock(a.mat)

TestSPMV(a.mat, "assembled SparseMatrix")
TestSPMV(dynA, "NgsAMG Dyn-Block matrix")


[stdout:0] 
Testing SPMV assembled SparseMatrix:
  sec per spmv:   0.03525040695670906

Testing SPMV NgsAMG Dyn-Block matrix:
  sec per spmv:   0.013413746875118959


In [9]:
%%px
from usrMtgStuffPar import TestSmoother

if MPI.COMM_WORLD.size == 1:
    bgs = a.mat.CreateBlockSmoother(blocks)

dynSM = amg.CreateDynBlockSmoother(a.mat, V.FreeDofs(True)) 


if MPI.COMM_WORLD.size == 1:
    TestSmoother(bgs,    a.mat, True, "NgSolve-Block-GS")
TestSmoother(dynSM, a.mat, True, "NgsAMG DynBlockSmoother")

%px:   0%|          | 0/4 [00:00<?, ?tasks/s]

[stdout:0] 
Testing smoother NgsAMG DynBlockSmoother:
  If used as preconditioner:
      lam min:   0.0025171851559938896
      lam max:   0.9996998223605251
      condition: 397.14989577943936
  sec per smooth forward:   0.015558838653623733
  sec per smooth backward:  0.016741156077549727
