# rocALUTION GPU solvers

### Description 
Test of different iterative linear solvers and preconditionners from library rocALUTION library https://github.com/ROCmSoftwarePlatform/rocALUTION. These solvers run either on GPU AMD card, either on CPU and  will be fully available in the TRUST 1.9.1 version.

In this sheet, we test all the different solvers/preconditionners for non regression. Parallel scalability of these solvers will be tested in another sheet.

In [None]:
from trustutils import run
run.TRUST_parameters()
run.introduction('Pierre LEDAC (CEA/DES/ISAS/DM2S/STMF/LCAN)')
# Creation des jeux de donnees
NP=1
#MESH="31 31 10" # 2 592 000
#MESH="11 11 3"  #    64 000
MESH="7 7 2"
seuil="seuil 1.e-4 impr"
cases=[
       ("trust_gcp_ssor"    ,"Trust CG/SSOR"   ,"           gcp  { precond ssor { omega 1.6 } %s }" % seuil),
       ("gcp_ssor"     ,"CG/SSOR"               ,"rocalution gcp  { precond ssor { omega 1.0 } %s }" % seuil),
       #("gcp_sgs1.6"     ,"CG/SGS1.6"          ,"rocalution gcp  { precond ssor { omega 1.6 } %s }" % seuil),
       #("gcp_diag"    ,"Petsc CG/Jacobi"      ,"petsc      gcp { precond diag   { } %s }" % seuil),#
       #("gcp_jacobi"        ,"CG/Jacobi"      ,"rocalution gcp { precond jacobi { } %s }" % seuil),#
       ("gcp_ilu0"             ,"CG/ILU(0)","rocalution gcp { precond ilu    { level 0 } %s }" % seuil),
       ("bicgstab_ilu0"   ,"BiCGStab/ILU(0)","rocalution bicgstab { precond ilu    { level 0 } %s }" % seuil),
       #("fgcp_pairwiseamg" ,"FCG/Pairwise-AMG","rocalution fgcp { precond pairwiseamg { } %s }" % seuil), #flexible plante
       ("gcp_pwamg"  ,"CG/PW-AMG"       ,"rocalution gcp  { precond pw-amg { } %s }" % seuil),# 
       ("gcp_uaamg"  ,"CG/UA-AMG"       ,"rocalution gcp  { precond ua-amg { } %s }" % seuil),#
       ("gcp_saamg"  ,"CG/SA-AMG"       ,"rocalution gcp  { precond sa-amg { } %s }" % seuil),#
       ("gcp_camg"   ,"CG/C-AMG"         ,"rocalution gcp  { precond c-amg  { } %s }" % seuil),#
       ("bicgstab_camg"   ,"BiCGStab/C-AMG"         ,"rocalution bicgstab  { precond c-amg  { } %s }" % seuil),#
      ]
run.reset()
run.initCaseSuite()
for case,label,syntax in cases:
    # Create test case:
    run.executeCommand("cas=%s;mkdir -p $cas;cd $cas;cp ../base.data $cas.data;ln -s -f ../post_run ." % case, verbose=False)
    cas = run.addCase(case,"%s.data" % case)
    cas.substitute("_solveur_",syntax)
    cas.substitute("_MESH_",MESH)
    # Create a parallel test case:
    if NP>1:
        run.executeCommand("cas=%s;cd $cas;make_PAR.data $cas %s;exit 0" % (case,NP), verbose=False)
        cas = run.addCase(case,"PAR_%s.data" % case, NP)
    
run.printCases()

In [None]:
run.runCases()

# Convergence

In [None]:
from trustutils import plot
    
a = plot.Graph("Relative residual ||Ax(it)-b||/||Ax(0)-b|| during the fist time step:","",1,1,[10,5])

for case,label,syntax in cases:
    cols = plot.loadText(case+"/%s.res" % case)
    a.add(cols[0],cols[1],label="%s" % label, marker='-')
    if NP>1:
        cols = plot.loadText(case+"/PAR_%s.res" % case)
        a.add(cols[0],cols[1],label="%s (%s MPI cores)" % (label,NP), marker='o')

a.label("Iteration","Residual")
a.subplot.set_yscale('log')

The fastest convergence are obtained with multigrid preconditionners. BiCGStab has better convergence than GC though the matrix is symmetric...

# Memory used

In [None]:
a = plot.Graph("Max RAM per core used during calculation:","",1,1,[10,5])
for case,label,syntax in cases:
    cols = plot.loadText(case+"/%s.ram" % case)
    a.add(cols[0],cols[1],label="%s" % label)
    if NP>1:
        cols = plot.loadText(case+"/PAR_%s.ram" % case)
        a.add(cols[0],cols[1],label="%s (%s MPI cores)" % (label,NP), marker='-o')
a.label("Time [s]","RAM [MB]")

# CPU time evolution

In [None]:
a = plot.Graph("CPU time of pressure solve during calculation:","",1,1,[10,5])
for case,label,syntax in cases:
    cols = plot.loadText(case+"/%s.cpu" % case)
    a.add(cols[0],cols[1],label="%s" % label)
    if NP>1:
        cols = plot.loadText(case+"/PAR_%s.cpu" % case)
        a.add(cols[0],cols[1],label="%s (%s MPI cores)" % (label,NP), marker='-o')
a.label("Time step","CPU [s]")
#a.scale(yscale='log')
#a.subplot.set_xticks(range(1,6))

The faster solver in sequential are CG with AMG preconditionners. Warning: this results should not be generalized on all meshes and with parallel computation.