From 8a5944b506f00a80e27e5205b7390fc4e09bc6b3 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 21 Mar 2026 21:27:00 +0100 Subject: [PATCH] let him cook --- .../applyAscOrtho.inl | 132 +++++++-- .../buildInnerBoundaryAsc.inl | 197 +++++++++++++ ...scMatrices.inl => buildTridiagonalAsc.inl} | 276 +++--------------- .../extrapolatedSmootherTake.h | 127 ++++---- .../extrapolatedSmootherTake.inl | 56 +--- .../initializeMumps.inl | 114 -------- .../solveAscSystem.inl | 30 +- .../extrapolatedSmoother.h | 3 +- 8 files changed, 426 insertions(+), 509 deletions(-) create mode 100644 include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl rename include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/{buildAscMatrices.inl => buildTridiagonalAsc.inl} (65%) delete mode 100644 include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.inl diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl index 1d6cc45c..f59ecc33 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl @@ -461,51 +461,127 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar } // namespace extrapolated_smoother_take template -void ExtrapolatedSmootherTake::applyAscOrthoCircleSection(int i_r, ConstVector x, - ConstVector rhs, Vector temp) +void ExtrapolatedSmootherTake::applyAscOrthoBlackCircleSection(ConstVector x, + ConstVector rhs, + Vector temp) { - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const LevelCache& level_cache = ExtrapolatedSmoother::level_cache_; + using extrapolated_smoother_take::nodeApplyAscOrthoCircleTake; - assert(i_r >= 0 && i_r < grid.numberSmootherCircles()); + const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; + const LevelCache& level_cache = ExtrapolatedSmootherTake::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); - const auto& arr = level_cache.arr(); - const auto& att = level_cache.att(); - const auto& art = level_cache.art(); - const auto& detDF = level_cache.detDF(); - const auto& coeff_beta = level_cache.coeff_beta(); + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - extrapolated_smoother_take::nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, - ExtrapolatedSmoother::DirBC_Interior_, - x, rhs, temp, arr, att, art, detDF, coeff_beta); + /* The outer most circle next to the radial section is defined to be black. */ + const int start_black_circles = (grid.numberSmootherCircles() % 2 == 0) ? 1 : 0; + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } template -void ExtrapolatedSmootherTake::applyAscOrthoRadialSection(int i_theta, ConstVector x, - ConstVector rhs, Vector temp) +void ExtrapolatedSmootherTake::applyAscOrthoWhiteCircleSection(ConstVector x, + ConstVector rhs, + Vector temp) { - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const LevelCache& level_cache = ExtrapolatedSmoother::level_cache_; + using extrapolated_smoother_take::nodeApplyAscOrthoCircleTake; - assert(i_theta >= 0 && i_theta < grid.ntheta()); + const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; + const LevelCache& level_cache = ExtrapolatedSmootherTake::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); - const auto& arr = level_cache.arr(); - const auto& att = level_cache.att(); - const auto& art = level_cache.art(); - const auto& detDF = level_cache.detDF(); - const auto& coeff_beta = level_cache.coeff_beta(); + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - extrapolated_smoother_take::nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, - ExtrapolatedSmoother::DirBC_Interior_, - x, rhs, temp, arr, att, art, detDF, coeff_beta); + /* The outer most circle next to the radial section is defined to be black. */ + const int start_white_circles = (grid.numberSmootherCircles() % 2 == 0) ? 0 : 1; + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } + +template +void ExtrapolatedSmootherTake::applyAscOrthoBlackRadialSection(ConstVector x, + ConstVector rhs, + Vector temp) +{ + using extrapolated_smoother_take::nodeApplyAscOrthoRadialTake; + + const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; + const LevelCache& level_cache = ExtrapolatedSmootherTake::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} + +template +void ExtrapolatedSmootherTake::applyAscOrthoWhiteRadialSection(ConstVector x, + ConstVector rhs, + Vector temp) +{ + using extrapolated_smoother_take::nodeApplyAscOrthoRadialTake; + + const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; + const LevelCache& level_cache = ExtrapolatedSmootherTake::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} \ No newline at end of file diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl new file mode 100644 index 00000000..9faa7a76 --- /dev/null +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl @@ -0,0 +1,197 @@ +#pragma once + +namespace extrapolated_smoother_take +{ + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) = value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) = value; +} +#endif + +} // namespace extrapolated_smoother_take + +template +void ExtrapolatedSmootherTake::nodeBuildInteriorBoundarySolverMatrix( + int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) +{ + using extrapolated_smoother_take::update_CSR_COO_MatrixElement; + + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + /* ------------------------------------------ */ + /* Circle Section: Node in the inner boundary */ + /* ------------------------------------------ */ + const int i_r = 0; + + int ptr, offset; + int row, column; + double value; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + const int center_index = i_theta; + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + double h1 = 2.0 * grid.radius(0); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int right_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); + const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); + const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); + + int nz_index; + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + if (i_theta & 1) { + /* i_theta % 2 == 1 */ + /* -| x | o | x | */ + /* -| | | | */ + /* -| O | o | o | */ + /* -| | | | */ + /* -| x | o | x | */ + + const int left = grid.index(i_r, i_theta_AcrossOrigin); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + const double left_value = -coeff1 * (arr[center] + arr[left]); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = center_value; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = left_value; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* i_theta % 2 == 0 */ + /* -| o | o | o | */ + /* -| | | | */ + /* -| X | o | x | */ + /* -| | | | */ + /* -| o | o | o | */ + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + } +} + +template +typename ExtrapolatedSmootherTake::InnerBoundaryMatrix +ExtrapolatedSmootherTake::buildInteriorBoundarySolverMatrix() +{ + const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; + const LevelCache& level_cache = ExtrapolatedSmootherTake::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + + const int i_r = 0; + const int ntheta = grid.ntheta(); + +#ifdef GMGPOLAR_USE_MUMPS + const int nnz = getNonZeroCountCircleAsc(i_r); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(true); +#else + std::function nnz_per_row = [&](int i_theta) { + if (DirBC_Interior) + return 1; + else + return i_theta % 2 == 0 ? 1 : 2; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + assert(level_cache.cacheDensityProfileCoefficients()); + assert(level_cache.cacheDomainGeometry()); + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads) + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, + art, detDF, coeff_beta); + } + + return inner_boundary_solver_matrix; +} diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl similarity index 65% rename from include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.inl rename to include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl index 89461a9d..3f7efaaa 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl @@ -3,7 +3,6 @@ namespace extrapolated_smoother_take { -/* Tridiagonal matrices */ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { @@ -15,34 +14,15 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, solver.cyclic_corner(batch) = value; } -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) = val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) = val; -} -#endif - } // namespace extrapolated_smoother_take template -void ExtrapolatedSmootherTake::nodeBuildAscTake( - int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& inner_boundary_circle_matrix, +void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrices( + int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, BatchedTridiagonalSolver& circle_tridiagonal_solver, BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) { - using extrapolated_smoother_take::updateCOOCSRMatrixElement; using extrapolated_smoother_take::updateMatrixElement; assert(i_r >= 0 && i_r < grid.nr()); @@ -168,117 +148,34 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake( /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - auto& matrix = inner_boundary_circle_matrix; - const int center_index = i_theta; - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); - const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); - const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); - - auto& matrix = inner_boundary_circle_matrix; - - int nz_index; - const Stencil& CenterStencil = getStencil(i_r, i_theta); + // The inner boundary circle line are is handled by the inner_boundary_solver, so we fill in the identity matrix. + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - if (i_theta & 1) { - /* i_theta % 2 == 1 */ - /* -| x | o | x | */ - /* -| | | | */ - /* -| O | o | o | */ - /* -| | | | */ - /* -| x | o | x | */ - - const int left = grid.index(i_r, i_theta_AcrossOrigin); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - const double left_value = -coeff1 * (arr[center] + arr[left]); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = center_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = left_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - else { - /* i_theta % 2 == 0 */ - /* -| o | o | o | */ - /* -| | | | */ - /* -| X | o | x | */ - /* -| | | | */ - /* -| o | o | o | */ - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - } + auto& solver = circle_tridiagonal_solver; + const int batch = i_r; + + const int center_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Bottom */ + row = center_index; + column = bottom_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Top */ + row = center_index; + column = top_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); } /* ------------------------------------------ */ /* Node in the interior of the Radial Section */ @@ -619,33 +516,12 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake( } template -void ExtrapolatedSmootherTake::buildAscCircleSection(int i_r) -{ - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const LevelCache& level_cache = ExtrapolatedSmoother::level_cache_; - - assert(level_cache.cacheDensityProfileCoefficients()); - assert(level_cache.cacheDomainGeometry()); - - ConstVector arr = level_cache.arr(); - ConstVector att = level_cache.att(); - ConstVector art = level_cache.art(); - ConstVector detDF = level_cache.detDF(); - ConstVector coeff_beta = level_cache.coeff_beta(); - - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid, ExtrapolatedSmoother::DirBC_Interior_, - inner_boundary_circle_matrix_, circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, - att, art, detDF, coeff_beta); - } -} - -template -void ExtrapolatedSmootherTake::buildAscRadialSection(int i_theta) +void ExtrapolatedSmootherTake::buildTridiagonalSolverMatrices() { - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const LevelCache& level_cache = ExtrapolatedSmoother::level_cache_; + const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; + const LevelCache& level_cache = ExtrapolatedSmootherTake::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -656,90 +532,22 @@ void ExtrapolatedSmootherTake::buildAscRadialSection(int i_theta ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid, ExtrapolatedSmoother::DirBC_Interior_, - inner_boundary_circle_matrix_, circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, - att, art, detDF, coeff_beta); - } -} - -template -void ExtrapolatedSmootherTake::buildAscMatrices() -{ - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; - - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - if (DirBC_Interior) - return 1; - else - return i_theta % 2 == 0 ? 1 : 2; - }; - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ - #pragma omp parallel num_threads(num_omp_threads) { #pragma omp for nowait for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildAscCircleSection(i_r); + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } } #pragma omp for nowait for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildAscRadialSection(i_theta); - } - } - - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------- */ - /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ - /* ------------------------------------------------------------------- */ - - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } } } -#endif } diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h index de0c615f..bb2b242d 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h @@ -62,49 +62,19 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~ExtrapolatedSmootherTake() override; - // Performs one full coupled extrapolated smoothing sweep: // BC -> WC -> BR -> WR // using temp as RHS workspace. void extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -126,6 +96,48 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother }; // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The inner circle matrix (i_r = 0) is NOT tridiagonal due to across-origin coupling. + // It is solved using a general-purpose sparse solver. + // - MUMPS: matrix assembled in COO format; solver owns the matrix internally. + // - In-house: matrix stored in CSR; solver does not own the matrix. + +#ifdef GMGPOLAR_USE_MUMPS + using InnerBoundaryMatrix = SparseMatrixCOO; + using InnerBoundarySolver = CooMumpsSolver; +#else + using InnerBoundaryMatrix = SparseMatrixCSR; + using InnerBoundarySolver = SparseLUSolver; + + // Stored only for the in-house solver (CSR). + InnerBoundaryMatrix inner_boundary_circle_matrix_; +#endif + + // Solver object (owns matrix if MUMPS, references if in-house solver). + InnerBoundarySolver inner_boundary_solver_; + + /* -------------- */ + /* Stencil access */ + /* -------------- */ + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -137,20 +149,23 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + void buildTridiagonalSolverMatrices(); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + InnerBoundaryMatrix buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + InnerBoundaryMatrix& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -158,8 +173,10 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ /* Line-wise solvers */ @@ -177,21 +194,11 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; #include "extrapolatedSmootherTake.inl" #include "smootherStencil.inl" -#include "buildAscMatrices.inl" +#include "buildInnerBoundaryAsc.inl" +#include "buildTridiagonalAsc.inl" #include "applyAscOrtho.inl" #include "solveAscSystem.inl" -#include "initializeMumps.inl" diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.inl index efbef404..b6e541e3 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.inl @@ -9,21 +9,16 @@ ExtrapolatedSmootherTake::ExtrapolatedSmootherTake( DirBC_Interior, num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) -{ - buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + , inner_boundary_solver_(buildInteriorBoundarySolverMatrix()) #else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_solver_(inner_boundary_circle_matrix_) #endif -} - -template -ExtrapolatedSmootherTake::~ExtrapolatedSmootherTake() { -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif + buildTridiagonalSolverMatrices(); + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); } // The smoothing solves linear systems of the form: @@ -56,46 +51,15 @@ void ExtrapolatedSmootherTake::extrapolatedSmoothing(Vector::level_cache_.cacheDensityProfileCoefficients()); - assert(ExtrapolatedSmoother::level_cache_.cacheDomainGeometry()); - - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; - - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - const int start_black_circles = (grid.numberSmootherCircles() % 2 == 0) ? 1 : 0; - const int start_white_circles = (grid.numberSmootherCircles() % 2 == 0) ? 0 : 1; - - /* Black Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoBlackCircleSection(x, rhs, temp); solveBlackCircleSection(x, temp); - /* White Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoWhiteCircleSection(x, rhs, temp); solveWhiteCircleSection(x, temp); - /* Black Radial Section */ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoBlackRadialSection(x, rhs, temp); solveBlackRadialSection(x, temp); - /* White Radial Section*/ -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ - + applyAscOrthoWhiteRadialSection(x, rhs, temp); solveWhiteRadialSection(x, temp); } diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.inl deleted file mode 100644 index ca5b4e74..00000000 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.inl +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once - -#ifdef GMGPOLAR_USE_MUMPS - -template -void ExtrapolatedSmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in " - "the factorization phase." - << std::endl; - } -} - -template -void ExtrapolatedSmootherTake::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.inl index d284a9cd..2ea092cc 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.inl @@ -22,19 +22,8 @@ void ExtrapolatedSmootherTake::solveBlackCircleSection(Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x @@ -69,19 +58,8 @@ void ExtrapolatedSmootherTake::solveWhiteCircleSection(Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x diff --git a/include/ExtrapolatedSmoother/extrapolatedSmoother.h b/include/ExtrapolatedSmoother/extrapolatedSmoother.h index 5e697e60..81f13614 100644 --- a/include/ExtrapolatedSmoother/extrapolatedSmoother.h +++ b/include/ExtrapolatedSmoother/extrapolatedSmoother.h @@ -5,6 +5,7 @@ #include #include "../InputFunctions/domainGeometry.h" +#include "../InputFunctions/densityProfileCoefficients.h" template class LevelCache; @@ -12,7 +13,6 @@ class LevelCache; template class Level; -#include "../InputFunctions/densityProfileCoefficients.h" #include "../Level/level.h" #include "../PolarGrid/polargrid.h" #include "../Definitions/global_definitions.h" @@ -22,6 +22,7 @@ class Level; #include "../LinearAlgebra/Matrix/coo_matrix.h" #include "../LinearAlgebra/Matrix/csr_matrix.h" #include "../LinearAlgebra/Solvers/csr_lu_solver.h" +#include "../LinearAlgebra/Solvers/coo_mumps_solver.h" #include "../Stencil/stencil.h" #ifdef GMGPOLAR_USE_MUMPS