From 4edf3691078afa38d7eab861492f9fdf87f24579 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 1 May 2026 23:07:18 +0200 Subject: [PATCH 01/20] Use Kokkos:Parallel in SmootherTake and Prepare COO/CSR --- include/LinearAlgebra/Matrix/coo_matrix.h | 57 ++++++++- include/LinearAlgebra/Matrix/csr_matrix.h | 117 +++++++++++++++--- .../Solvers/tridiagonal_solver.h | 33 ++++- .../SmootherTake/buildInnerBoundaryAsc.inl | 60 +++++---- .../SmootherTake/buildTridiagonalAsc.inl | 75 ++++++----- .../Smoother/SmootherTake/matrixStencil.inl | 52 ++++++-- include/Smoother/SmootherTake/smootherTake.h | 55 +------- 7 files changed, 301 insertions(+), 148 deletions(-) diff --git a/include/LinearAlgebra/Matrix/coo_matrix.h b/include/LinearAlgebra/Matrix/coo_matrix.h index c3c5c5c8..38d2fe78 100644 --- a/include/LinearAlgebra/Matrix/coo_matrix.h +++ b/include/LinearAlgebra/Matrix/coo_matrix.h @@ -37,19 +37,36 @@ class SparseMatrixCOO SparseMatrixCOO& operator=(const SparseMatrixCOO& other); SparseMatrixCOO& operator=(SparseMatrixCOO&& other) noexcept; + // ---------------------------- // + // Size queries (host + device) // + // ---------------------------- // int rows() const; int columns() const; int non_zero_size() const; + // --------------------------- // + // Read access (host + device) // + // --------------------------- // const int& row_index(int nz_index) const; - int& row_index(int nz_index); - const int& col_index(int nz_index) const; - int& col_index(int nz_index); - const T& value(int nz_index) const; + + // ------------------------------------------------------------- // + // Backwards-compatible mutable reference access (host only) // + // Deprecated: prefer set_nz_index / set_nz_entry / add_nz_entry // + // ------------------------------------------------------------- // + int& row_index(int nz_index); + int& col_index(int nz_index); T& value(int nz_index); + // -------------------------------------------------------------- // + // Const setters — safe for Kokkos device lambdas (host + device) // + // -------------------------------------------------------------- // + void set_row_index(int nz_index, int column) const; + void set_col_index(int nz_index, int row) const; + void set_value(int nz_index, T value) const; + void add_value(int nz_index, T value) const; + bool is_symmetric() const; void is_symmetric(bool value); @@ -318,6 +335,38 @@ const T& SparseMatrixCOO::value(int nz_index) const return values_(nz_index); } +template +void SparseMatrixCOO::set_row_index(int nz_index, int column) const +{ + assert(nz_index >= 0); + assert(nz_index < nnz_); + row_indices_(nz_index) = column; +} + +template +void SparseMatrixCOO::set_col_index(int nz_index, int row) const +{ + assert(nz_index >= 0); + assert(nz_index < nnz_); + column_indices_(nz_index) = row; +} + +template +void SparseMatrixCOO::set_value(int nz_index, T value) const +{ + assert(nz_index >= 0); + assert(nz_index < nnz_); + values_(nz_index) = value; +} + +template +void SparseMatrixCOO::add_value(int nz_index, T value) const +{ + assert(nz_index >= 0); + assert(nz_index < nnz_); + values_(nz_index) += value; +} + template bool SparseMatrixCOO::is_symmetric() const { diff --git a/include/LinearAlgebra/Matrix/csr_matrix.h b/include/LinearAlgebra/Matrix/csr_matrix.h index 911f9199..6ae267aa 100644 --- a/include/LinearAlgebra/Matrix/csr_matrix.h +++ b/include/LinearAlgebra/Matrix/csr_matrix.h @@ -47,18 +47,39 @@ class SparseMatrixCSR SparseMatrixCSR& operator=(const SparseMatrixCSR& other); SparseMatrixCSR& operator=(SparseMatrixCSR&& other) noexcept; + // ---------------------------- // + // Size queries (host + device) // + // ---------------------------- // int rows() const; int columns() const; int non_zero_size() const; - int row_nz_size(int row) const; - const int& row_nz_index(int row, int nz_index) const; - int& row_nz_index(int row, int nz_index); - + // --------------------------- // + // Read access (host + device) // + // --------------------------- // + int row_nz_index(int row, int nz_index) const; const T& row_nz_entry(int row, int nz_index) const; + + // ------------------------------------------------------------- // + // Backwards-compatible mutable reference access (host only) // + // Deprecated: prefer set_nz_index / set_nz_entry / add_nz_entry // + // ------------------------------------------------------------- // + int& row_nz_index(int row, int nz_index); T& row_nz_entry(int row, int nz_index); + // -------------------------------------------------------------- // + // Const setters — safe for Kokkos device lambdas (host + device) // + // -------------------------------------------------------------- // + void set_nz_index(int row, int nz_index, int column) const; + void set_nz_entry(int row, int nz_index, T value) const; + void add_nz_entry(int row, int nz_index, T value) const; + + // --------------------------------------------------------------- // + // Raw pointer access (host only, for external solvers e.g. MUMPS) // + // TODO: Return Kokkos::View instead of raw pointers. // + // The COO_MUMPS_Solver is responsible for copying data to host // + // --------------------------------------------------------------- // T* values_data() const; int* column_indices_data() const; int* row_start_indices_data() const; @@ -74,6 +95,7 @@ class SparseMatrixCSR AllocatableVector column_indices_; AllocatableVector row_start_indices_; + // Only used in testing bool is_sorted_entries(const std::vector>& entries) { for (size_t i = 1; i < entries.size(); ++i) { @@ -89,14 +111,19 @@ class SparseMatrixCSR template std::ostream& operator<<(std::ostream& stream, const SparseMatrixCSR& matrix) { + // Create host mirrors and deep_copy from device if necessary. + // If the View is already on host, deep_copy is a no-op. + auto h_values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, matrix.values_); + auto h_column_indices = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, matrix.column_indices_); + auto h_row_start_indices = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, matrix.row_start_indices_); + stream << "SparseMatrixCSR: " << matrix.rows_ << " x " << matrix.columns_ << "\n"; stream << "Number of non-zeros (nnz): " << matrix.nnz_ << "\n"; stream << "Non-zero elements (row, column, value):\n"; - for (int row = 0; row < matrix.rows_; ++row) { - for (int nnz = matrix.row_start_indices_(row); nnz < matrix.row_start_indices_(row + 1); ++nnz) { - stream << "(" << row << ", " << matrix.column_indices_(nnz) << ", " << matrix.values_(nnz) << ")\n"; - } - } + for (int row = 0; row < matrix.rows_; ++row) + for (int nnz = h_row_start_indices(row); nnz < h_row_start_indices(row + 1); ++nnz) + stream << "(" << row << ", " << h_column_indices(nnz) << ", " << h_values(nnz) << ")\n"; + return stream; } @@ -196,8 +223,8 @@ SparseMatrixCSR::SparseMatrixCSR(int rows, int columns, std::function("CSR values", nnz_); column_indices_ = Vector("CSR column indices", nnz_); - assign(values_, T(0)); - assign(column_indices_, 0); + Kokkos::deep_copy(values_, T(0)); + Kokkos::deep_copy(column_indices_, 0); } template @@ -246,12 +273,23 @@ SparseMatrixCSR::SparseMatrixCSR(int rows, int columns, const std::vector& assert(row_start_indices.size() == static_cast(rows + 1)); assert(values.size() == column_indices.size()); - // Copy data to internal storage - std::copy(values.begin(), values.end(), values_.data()); - std::copy(column_indices.begin(), column_indices.end(), column_indices_.data()); - std::copy(row_start_indices.begin(), row_start_indices.end(), row_start_indices_.data()); + // Wrap the std::vector data in unmanaged host Views, then deep_copy. + // deep_copy handles host->device transfer automatically; it is a no-op + // when both sides are already in HostSpace. + Kokkos::View h_values(values.data(), nnz_); + Kokkos::View h_column_indices(column_indices.data(), nnz_); + Kokkos::View h_row_start_indices(row_start_indices.data(), + rows + 1); + + Kokkos::deep_copy(values_, h_values); + Kokkos::deep_copy(column_indices_, h_column_indices); + Kokkos::deep_copy(row_start_indices_, h_row_start_indices); } +// ============================================================================ +// Size queries +// ============================================================================ + template int SparseMatrixCSR::rows() const { @@ -268,7 +306,6 @@ template int SparseMatrixCSR::non_zero_size() const { assert(this->nnz_ >= 0); - assert(static_cast(this->nnz_) <= static_cast(this->rows_) * static_cast(this->columns_)); return this->nnz_; } @@ -279,14 +316,30 @@ int SparseMatrixCSR::row_nz_size(int row) const return row_start_indices_(row + 1) - row_start_indices_(row); } +// ============================================================================ +// Read access (host + device) +// ============================================================================ + template -const int& SparseMatrixCSR::row_nz_index(int row, int nz_index) const +int SparseMatrixCSR::row_nz_index(int row, int nz_index) const { assert(row >= 0 && row < rows_); assert(nz_index >= 0 && nz_index < row_nz_size(row)); return column_indices_(row_start_indices_(row) + nz_index); } +template +const T& SparseMatrixCSR::row_nz_entry(int row, int nz_index) const +{ + assert(row >= 0 && row < rows_); + assert(nz_index >= 0 && nz_index < row_nz_size(row)); + return values_(row_start_indices_(row) + nz_index); +} + +// ============================================================================ +// Backwards-compatible mutable reference access (host only) +// ============================================================================ + template int& SparseMatrixCSR::row_nz_index(int row, int nz_index) { @@ -296,21 +349,45 @@ int& SparseMatrixCSR::row_nz_index(int row, int nz_index) } template -const T& SparseMatrixCSR::row_nz_entry(int row, int nz_index) const +T& SparseMatrixCSR::row_nz_entry(int row, int nz_index) { assert(row >= 0 && row < rows_); assert(nz_index >= 0 && nz_index < row_nz_size(row)); return values_(row_start_indices_(row) + nz_index); } +// ============================================================================ +// Const setters (host + device) +// ============================================================================ + template -T& SparseMatrixCSR::row_nz_entry(int row, int nz_index) +void SparseMatrixCSR::set_nz_index(int row, int nz_index, int column) const { assert(row >= 0 && row < rows_); assert(nz_index >= 0 && nz_index < row_nz_size(row)); - return values_(row_start_indices_(row) + nz_index); + column_indices_(row_start_indices_(row) + nz_index) = column; } +template +void SparseMatrixCSR::set_nz_entry(int row, int nz_index, T value) const +{ + assert(row >= 0 && row < rows_); + assert(nz_index >= 0 && nz_index < row_nz_size(row)); + values_(row_start_indices_(row) + nz_index) = value; +} + +template +void SparseMatrixCSR::add_nz_entry(int row, int nz_index, T value) const +{ + assert(row >= 0 && row < rows_); + assert(nz_index >= 0 && nz_index < row_nz_size(row)); + values_(row_start_indices_(row) + nz_index) += value; +} + +// ============================================================================ +// Raw pointer access (host only) +// ============================================================================ + template T* SparseMatrixCSR::values_data() const { diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h index 4e83ad02..509c5115 100644 --- a/include/LinearAlgebra/Solvers/tridiagonal_solver.h +++ b/include/LinearAlgebra/Solvers/tridiagonal_solver.h @@ -54,6 +54,16 @@ class BatchedTridiagonalSolver { return main_diagonal_(batch_idx * matrix_dimension_ + index); } + KOKKOS_INLINE_FUNCTION + void set_main_diagonal(const int batch_idx, const int index, const T value) const + { + main_diagonal_(batch_idx * matrix_dimension_ + index) = value; + } + KOKKOS_INLINE_FUNCTION + void add_main_diagonal(const int batch_idx, const int index, const T value) const + { + main_diagonal_(batch_idx * matrix_dimension_ + index) += value; + } KOKKOS_INLINE_FUNCTION const T& sub_diagonal(const int batch_idx, const int index) const @@ -65,18 +75,37 @@ class BatchedTridiagonalSolver { return sub_diagonal_(batch_idx * matrix_dimension_ + index); } + KOKKOS_INLINE_FUNCTION + void set_sub_diagonal(const int batch_idx, const int index, const T value) const + { + sub_diagonal_(batch_idx * matrix_dimension_ + index) = value; + } + KOKKOS_INLINE_FUNCTION + void add_sub_diagonal(const int batch_idx, const int index, const T value) const + { + sub_diagonal_(batch_idx * matrix_dimension_ + index) += value; + } KOKKOS_INLINE_FUNCTION const T& cyclic_corner(const int batch_idx) const { return sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1)); } - KOKKOS_INLINE_FUNCTION T& cyclic_corner(const int batch_idx) { return sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1)); } + KOKKOS_INLINE_FUNCTION + void set_cyclic_corner(const int batch_idx, const T value) const + { + sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1)) = value; + } + KOKKOS_INLINE_FUNCTION + void add_cyclic_corner(const int batch_idx, const T value) const + { + sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1)) += value; + } /* ---------------------------------------------- */ /* Setup: Cholesky Decomposition: A = L * D * L^T */ @@ -229,7 +258,7 @@ class BatchedTridiagonalSolver rhs(offset + 0) + cyclic_corner_element / gamma(batch_idx) * rhs(offset + matrix_dimension - 1); const T dot_product_u_v = buffer(offset + 0) + cyclic_corner_element / gamma(batch_idx) * buffer(offset + matrix_dimension - 1); - const T factor = dot_product_x_v / (1.0 + dot_product_u_v); + const T factor = dot_product_x_v / (1.0 + dot_product_u_v); for (int i = 0; i < matrix_dimension; i++) { rhs(offset + i) -= factor * buffer(offset + i); diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 978acdc2..92d7d2a3 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -2,33 +2,36 @@ namespace smoother_take { +#include "matrixStencil.inl" #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static inline void update_CSR_COO_MatrixElement(const SparseMatrixCOO& matrix, const int ptr, const int offset, + const int row, const int column, const double value) { - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = column; - matrix.value(ptr + offset) = value; + matrix.set_row_index(ptr + offset, row); + matrix.set_col_index(ptr + offset, column); + matrix.set_value(ptr + offset, value); } #else // When using the in-house solver, the matrix is stored in CSR format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, - int column, double value) +static inline void update_CSR_COO_MatrixElement(const SparseMatrixCSR& matrix, const int ptr, const int offset, + const int row, const int column, const double value) { - matrix.row_nz_index(row, offset) = column; - matrix.row_nz_entry(row, offset) = value; + matrix.set_nz_index(row, offset, column); + matrix.set_nz_entry(row, offset, value); } #endif -} // namespace smoother_take - -template -void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( - int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) +// Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. +template +static inline void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + const MatrixType& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta) { + using smoother_take::getCircleAscIndex; + using smoother_take::getStencil; using smoother_take::update_CSR_COO_MatrixElement; assert(i_theta >= 0 && i_theta < grid.ntheta()); @@ -47,13 +50,13 @@ void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( /* ------------------------------------------------ */ if (DirBC_Interior) { const int center_index = i_theta; - const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int center_nz_index = getCircleAscIndex(i_r, i_theta, DirBC_Interior); /* Fill matrix row of (i,j) */ row = center_index; ptr = center_nz_index; - const Stencil& CenterStencil = getStencil(i_r); + const Stencil& CenterStencil = getStencil(i_r, DirBC_Interior); offset = CenterStencil[StencilPosition::Center]; column = center_index; @@ -92,7 +95,7 @@ void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( const int bottom_index = i_theta_M1; const int top_index = i_theta_P1; - const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int center_nz_index = getCircleAscIndex(i_r, i_theta, DirBC_Interior); const double left_value = -coeff1 * (arr[center] + arr[left]); const double right_value = -coeff2 * (arr[center] + arr[right]); @@ -106,7 +109,7 @@ void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( row = center_index; ptr = center_nz_index; - const Stencil& CenterStencil = getStencil(i_r); + const Stencil& CenterStencil = getStencil(i_r, DirBC_Interior); offset = CenterStencil[StencilPosition::Center]; column = center_index; @@ -130,10 +133,14 @@ void SmootherTake::nodeBuildInteriorBoundarySolverMatrix( } } +} // namespace smoother_take + template typename SmootherTake::InnerBoundaryMatrix SmootherTake::buildInteriorBoundarySolverMatrix() { + using smoother_take::nodeBuildInteriorBoundarySolverMatrix; + const PolarGrid& grid = Smoother::grid_; const LevelCacheType& level_cache = Smoother::level_cache_; const bool DirBC_Interior = Smoother::DirBC_Interior_; @@ -143,7 +150,8 @@ SmootherTake::buildInteriorBoundarySolverMatrix() const int ntheta = grid.ntheta(); #ifdef GMGPOLAR_USE_MUMPS - const int nnz = getNonZeroCountCircleAsc(i_r); + using smoother_take::getNonZeroCountCircleAsc; + const int nnz = getNonZeroCountCircleAsc(i_r, grid, DirBC_Interior); SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); inner_boundary_solver_matrix.is_symmetric(true); #else @@ -163,11 +171,13 @@ SmootherTake::buildInteriorBoundarySolverMatrix() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 0; i_theta < ntheta; i_theta++) { - nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, - art, detDF, coeff_beta); - } + Kokkos::parallel_for( + "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { + nodeBuildInteriorBoundarySolverMatrix( + i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + }); + + Kokkos::fence(); return inner_boundary_solver_matrix; } diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index 81d8d027..45d19446 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -3,25 +3,24 @@ namespace smoother_take { -static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, +static inline void updateMatrixElement(const BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { if (row == column) - solver.main_diagonal(batch, row) = value; + solver.set_main_diagonal(batch, row, value); else if (row == column - 1) - solver.sub_diagonal(batch, row) = value; + solver.set_sub_diagonal(batch, row, value); else if (row == 0 && column == solver.matrixDimension() - 1) - solver.cyclic_corner(batch) = value; + solver.set_cyclic_corner(batch, value); } -} // namespace smoother_take - -template -void SmootherTake::nodeBuildTridiagonalSolverMatrices( - int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) +// Build the tridiagonal solver matrices for a specific node (i_r, i_theta) +static inline void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + const BatchedTridiagonalSolver& circle_tridiagonal_solver, + const BatchedTridiagonalSolver& radial_tridiagonal_solver, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta) { using smoother_take::updateMatrixElement; @@ -302,9 +301,13 @@ void SmootherTake::nodeBuildTridiagonalSolverMatrices( } } +} // namespace smoother_take + template void SmootherTake::buildTridiagonalSolverMatrices() { + using smoother_take::nodeBuildTridiagonalSolverMatrices; + const PolarGrid& grid = Smoother::grid_; const LevelCacheType& level_cache = Smoother::level_cache_; const bool DirBC_Interior = Smoother::DirBC_Interior_; @@ -319,22 +322,34 @@ void SmootherTake::buildTridiagonalSolverMatrices() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); -#pragma omp parallel num_threads(num_omp_threads) - { -#pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, - radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } - } - -#pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, - radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } - } - } + /* We split the loops into two regions to better respect the */ + /* access patterns of the smoother and improve cache locality. */ + + // The For loop matches circular access pattern */ + Kokkos::parallel_for( + "Smoother Take: BuildTridiagonalAsc (Circular)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, 0}, // Starting point of the index space + {grid.numberSmootherCircles(), grid.ntheta()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int i_r, const int i_theta) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + }); + + /* For loop matches radial access pattern */ + Kokkos::parallel_for( + "Smoother Take: BuildTridiagonalAsc (Radial)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, grid.numberSmootherCircles()}, // Starting point of the index space + {grid.ntheta(), grid.nr()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int i_theta, const int i_r) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + }); + + Kokkos::fence(); } \ No newline at end of file diff --git a/include/Smoother/SmootherTake/matrixStencil.inl b/include/Smoother/SmootherTake/matrixStencil.inl index a2135d66..080e2597 100644 --- a/include/Smoother/SmootherTake/matrixStencil.inl +++ b/include/Smoother/SmootherTake/matrixStencil.inl @@ -1,7 +1,11 @@ #pragma once -template -const Stencil& SmootherTake::getStencil(int i_r) const +/* -------------- */ +/* Stencil access */ +/* -------------- */ + +// Select correct stencil depending on the grid position. +static inline const Stencil& getStencil(int i_r, bool DirBC_Interior) { // Only i_r = 0 is implemented. // Stencils are only used to obtain offsets to index into COO/CSR matrices. @@ -9,13 +13,39 @@ const Stencil& SmootherTake::getStencil(int i_r) const // because it has an additional across-origin coupling. assert(i_r == 0); - const bool DirBC_Interior = Smoother::DirBC_Interior_; + /* ------------------- */ + /* Stencil definitions */ + /* ------------------- */ + + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + + // Stencils encode neighborhood connectivity for A_sc matrix assembly. + // It is only used in the construction of COO/CSR matrices. + // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. + // The Stencil class stores the offset for each position. + // - Non-zero matrix indicesare obtained via `ptr + offset` + // - A offset value of `-1` means the position is not included in the stencil pattern. + // - Other values (0, 1, 2, ..., stencil_size - 1) correspond to valid stencil indices. - return DirBC_Interior ? stencil_DB_ : circle_stencil_across_origin_; + // clang-format off + static const Stencil stencil_DB = { + -1, -1, -1, + -1, 0, -1, + -1, -1, -1 + }; + static const Stencil circle_stencil_across_origin = { + -1, 3, -1, + 1, 0, -1, + -1, 2, -1 + }; + // clang-format on + + return DirBC_Interior ? stencil_DB : circle_stencil_across_origin; } -template -int SmootherTake::getNonZeroCountCircleAsc(int i_r) const +// Number of nonzero A_sc entries. +static inline int getNonZeroCountCircleAsc(int i_r, const PolarGrid& grid, bool DirBC_Interior) { // Only i_r = 0 is implemented. // The number of nonzero elements is only needed to construct COO matrices. @@ -23,15 +53,13 @@ int SmootherTake::getNonZeroCountCircleAsc(int i_r) const // because it has an additional across-origin coupling. assert(i_r == 0); - const PolarGrid& grid = Smoother::grid_; - const bool DirBC_Interior = Smoother::DirBC_Interior_; - const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 4; return size_stencil_inner_boundary * grid.ntheta(); } -template -int SmootherTake::getCircleAscIndex(int i_r, int i_theta) const +// Obtain a ptr to index into COO matrices. +// It accumulates all stencil sizes within a line up to, but excluding the current node. +static inline int getCircleAscIndex(int i_r, int i_theta, bool DirBC_Interior) { // Only i_r = 0 is implemented. // getCircleAscIndex accumulates all stencil sizes within a line up to, but excluding the current node. @@ -40,8 +68,6 @@ int SmootherTake::getCircleAscIndex(int i_r, int i_theta) const // because it has an additional across-origin coupling. assert(i_r == 0); - const bool DirBC_Interior = Smoother::DirBC_Interior_; - const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 4; return size_stencil_inner_boundary * i_theta; } diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index e872a19b..52d54725 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -62,34 +62,6 @@ class SmootherTake : public Smoother void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Stencil definitions */ - /* ------------------- */ - - // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, - // since the mumps solver will be build in the member initializer of the Smoother class. - - // Stencils encode neighborhood connectivity for A_sc matrix assembly. - // It is only used in the construction of COO/CSR matrices. - // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. - // The Stencil class stores the offset for each position. - // - Non-zero matrix indicesare obtained via `ptr + offset` - // - A offset value of `-1` means the position is not included in the stencil pattern. - // - Other values (0, 1, 2, ..., stencil_size - 1) correspond to valid stencil indices. - - // clang-format off - const Stencil stencil_DB_ = { - -1, -1, -1, - -1, 0, -1, - -1, -1, -1 - }; - const Stencil circle_stencil_across_origin_ = { - -1, 3, -1, - 1, 0, -1, - -1, 2, -1 - }; - // clang-format on - /* ------------------- */ /* Tridiagonal solvers */ /* ------------------- */ @@ -128,38 +100,13 @@ class SmootherTake : public Smoother // Solver object (owns matrix if MUMPS, references if in-house solver). InnerBoundarySolver inner_boundary_solver_; - /* -------------- */ - /* Stencil access */ - /* -------------- */ - - // Select correct stencil depending on the grid position. - const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ - // Number of nonzero A_sc entries. - int getNonZeroCountCircleAsc(int i_r) const; /* Only i_r = 0 implemented */ - // Obtain a ptr to index into COO matrices. - // It accumulates all stencil sizes within a line up to, but excluding the current node. - int getCircleAscIndex(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ - /* --------------- */ /* Matrix assembly */ /* --------------- */ // Build all A_sc matrices for circle and radial smoothers. void buildTridiagonalSolverMatrices(); - // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) - void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, - ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); - // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. InnerBoundaryMatrix buildInteriorBoundarySolverMatrix(); - // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. - void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, - InnerBoundaryMatrix& matrix, ConstVector& arr, - ConstVector& att, ConstVector& art, - ConstVector& detDF, ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -195,5 +142,5 @@ class SmootherTake : public Smoother #include "buildTridiagonalAsc.inl" #include "applyAscOrtho.inl" #include "solveAscSystem.inl" -#include "matrixStencil.inl" + } // namespace gmgpolar From 081e70d5ba838a4fbe080444fb702d73be171d64 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Fri, 1 May 2026 23:11:41 +0200 Subject: [PATCH 02/20] Formatting --- include/LinearAlgebra/Solvers/tridiagonal_solver.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h index 509c5115..1aa72aa6 100644 --- a/include/LinearAlgebra/Solvers/tridiagonal_solver.h +++ b/include/LinearAlgebra/Solvers/tridiagonal_solver.h @@ -258,7 +258,8 @@ class BatchedTridiagonalSolver rhs(offset + 0) + cyclic_corner_element / gamma(batch_idx) * rhs(offset + matrix_dimension - 1); const T dot_product_u_v = buffer(offset + 0) + cyclic_corner_element / gamma(batch_idx) * buffer(offset + matrix_dimension - 1); - const T factor = dot_product_x_v / (1.0 + dot_product_u_v); + + const T factor = dot_product_x_v / (1.0 + dot_product_u_v); for (int i = 0; i < matrix_dimension; i++) { rhs(offset + i) -= factor * buffer(offset + i); From 26dff4ebb788078cc38310acb2add52299894bd6 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 1 May 2026 23:32:52 +0200 Subject: [PATCH 03/20] Investigate invalid matrix --- include/LinearAlgebra/Matrix/coo_matrix.h | 12 ++++++------ src/LinearAlgebra/Solvers/coo_mumps_solver.cpp | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/LinearAlgebra/Matrix/coo_matrix.h b/include/LinearAlgebra/Matrix/coo_matrix.h index 38d2fe78..ce4c3112 100644 --- a/include/LinearAlgebra/Matrix/coo_matrix.h +++ b/include/LinearAlgebra/Matrix/coo_matrix.h @@ -62,8 +62,8 @@ class SparseMatrixCOO // -------------------------------------------------------------- // // Const setters — safe for Kokkos device lambdas (host + device) // // -------------------------------------------------------------- // - void set_row_index(int nz_index, int column) const; - void set_col_index(int nz_index, int row) const; + void set_row_index(int nz_index, int row) const; + void set_col_index(int nz_index, int column) const; void set_value(int nz_index, T value) const; void add_value(int nz_index, T value) const; @@ -336,19 +336,19 @@ const T& SparseMatrixCOO::value(int nz_index) const } template -void SparseMatrixCOO::set_row_index(int nz_index, int column) const +void SparseMatrixCOO::set_row_index(int nz_index, int row) const { assert(nz_index >= 0); assert(nz_index < nnz_); - row_indices_(nz_index) = column; + row_indices_(nz_index) = row; } template -void SparseMatrixCOO::set_col_index(int nz_index, int row) const +void SparseMatrixCOO::set_col_index(int nz_index, int column) const { assert(nz_index >= 0); assert(nz_index < nnz_); - column_indices_(nz_index) = row; + column_indices_(nz_index) = column; } template diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp index 96abc80b..12c78cd4 100644 --- a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -9,6 +9,8 @@ using namespace gmgpolar; CooMumpsSolver::CooMumpsSolver(SparseMatrixCOO matrix) { + std::cout << matrix << std::endl; + if (matrix.is_symmetric()) { matrix_ = extractUpperTriangle(matrix); } From 8158495aac153f35f81234cb7f50ba6b6d33a014 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 00:02:57 +0200 Subject: [PATCH 04/20] test empty matrix --- .../SmootherTake/buildInnerBoundaryAsc.inl | 17 ++++++++----- .../SmootherTake/buildTridiagonalAsc.inl | 24 +++++++++++++------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 92d7d2a3..15b98409 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -171,13 +171,18 @@ SmootherTake::buildInteriorBoundarySolverMatrix() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); - Kokkos::parallel_for( - "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { - nodeBuildInteriorBoundarySolverMatrix( - i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); - }); + // Kokkos::parallel_for( + // "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { + // nodeBuildInteriorBoundarySolverMatrix( + // i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + // }); - Kokkos::fence(); + // Kokkos::fence(); + + for (int i_theta = 0; i_theta < ntheta; ++i_theta) { + nodeBuildInteriorBoundarySolverMatrix( + i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + } return inner_boundary_solver_matrix; } diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index 45d19446..f35db61a 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -6,12 +6,18 @@ namespace smoother_take static inline void updateMatrixElement(const BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { - if (row == column) + if (row == column) { solver.set_main_diagonal(batch, row, value); - else if (row == column - 1) + std::cout << solver.main_diagonal(batch, row) << ", " << value << std::endl; + } + else if (row == column - 1) { solver.set_sub_diagonal(batch, row, value); - else if (row == 0 && column == solver.matrixDimension() - 1) + std::cout << solver.sub_diagonal(batch, row) << ", " << value << std::endl; + } + else if (row == 0 && column == solver.matrixDimension() - 1) { solver.set_cyclic_corner(batch, value); + std::cout << solver.cyclic_corner(batch) << ", " << value << std::endl; + } } // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) @@ -312,6 +318,10 @@ void SmootherTake::buildTridiagonalSolverMatrices() const LevelCacheType& level_cache = Smoother::level_cache_; const bool DirBC_Interior = Smoother::DirBC_Interior_; const int num_omp_threads = Smoother::num_omp_threads_; + const BatchedTridiagonalSolver& circle_tridiagonal_solver = + SmootherTake::circle_tridiagonal_solver_; + const BatchedTridiagonalSolver& radial_tridiagonal_solver = + SmootherTake::radial_tridiagonal_solver_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -334,8 +344,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, - radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver, + radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); }); /* For loop matches radial access pattern */ @@ -347,8 +357,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, - radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver, + radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); }); Kokkos::fence(); From cc7df4faf873d47edb646a027ba40e4a3402a96e Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 00:22:43 +0200 Subject: [PATCH 05/20] swap in kokkos loop --- .../SmootherTake/buildInnerBoundaryAsc.inl | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 15b98409..92d7d2a3 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -171,18 +171,13 @@ SmootherTake::buildInteriorBoundarySolverMatrix() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); - // Kokkos::parallel_for( - // "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { - // nodeBuildInteriorBoundarySolverMatrix( - // i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); - // }); + Kokkos::parallel_for( + "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { + nodeBuildInteriorBoundarySolverMatrix( + i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + }); - // Kokkos::fence(); - - for (int i_theta = 0; i_theta < ntheta; ++i_theta) { - nodeBuildInteriorBoundarySolverMatrix( - i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); - } + Kokkos::fence(); return inner_boundary_solver_matrix; } From 5129054d43979bcf331e737aaa7c5bbc9ba876f5 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 00:39:27 +0200 Subject: [PATCH 06/20] sanity --- .../SmootherTake/buildInnerBoundaryAsc.inl | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 92d7d2a3..4de66dfe 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -9,9 +9,12 @@ namespace smoother_take static inline void update_CSR_COO_MatrixElement(const SparseMatrixCOO& matrix, const int ptr, const int offset, const int row, const int column, const double value) { + std::cout << "Updating COO matrix at row " << row << ", column " << column << " with value " << value << std::endl; matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); matrix.set_value(ptr + offset, value); + std::cout << "Updated COO matrix at row " << matrix.row_index(ptr + offset) << ", column " + << matrix.col_index(ptr + offset) << " with value " << matrix.value(ptr + offset) << std::endl; } #else // When using the in-house solver, the matrix is stored in CSR format. @@ -23,12 +26,17 @@ static inline void update_CSR_COO_MatrixElement(const SparseMatrixCSR& m } #endif -// Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. -template -static inline void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, - const MatrixType& matrix, ConstVector& arr, - ConstVector& att, ConstVector& art, - ConstVector& detDF, ConstVector& coeff_beta) +#ifdef GMGPOLAR_USE_MUMPS +using InnerBoundaryMatrix = SparseMatrixCOO; +#else +using InnerBoundaryMatrix = SparseMatrixCSR; +#endif + +KOKKOS_INLINE_FUNCTION +static void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + const InnerBoundaryMatrix& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta) { using smoother_take::getCircleAscIndex; using smoother_take::getStencil; @@ -173,8 +181,8 @@ SmootherTake::buildInteriorBoundarySolverMatrix() Kokkos::parallel_for( "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { - nodeBuildInteriorBoundarySolverMatrix( - i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, + art, detDF, coeff_beta); }); Kokkos::fence(); From e2805e223e26175abcdce720735d7e6482ef00a7 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Sat, 2 May 2026 00:51:01 +0200 Subject: [PATCH 07/20] Update coo_matrix.h --- include/LinearAlgebra/Matrix/coo_matrix.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/LinearAlgebra/Matrix/coo_matrix.h b/include/LinearAlgebra/Matrix/coo_matrix.h index ce4c3112..b893783e 100644 --- a/include/LinearAlgebra/Matrix/coo_matrix.h +++ b/include/LinearAlgebra/Matrix/coo_matrix.h @@ -62,9 +62,13 @@ class SparseMatrixCOO // -------------------------------------------------------------- // // Const setters — safe for Kokkos device lambdas (host + device) // // -------------------------------------------------------------- // + KOKKOS_INLINE_FUNCTION void set_row_index(int nz_index, int row) const; + KOKKOS_INLINE_FUNCTION void set_col_index(int nz_index, int column) const; + KOKKOS_INLINE_FUNCTION void set_value(int nz_index, T value) const; + KOKKOS_INLINE_FUNCTION void add_value(int nz_index, T value) const; bool is_symmetric() const; @@ -336,6 +340,7 @@ const T& SparseMatrixCOO::value(int nz_index) const } template +KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::set_row_index(int nz_index, int row) const { assert(nz_index >= 0); @@ -344,6 +349,7 @@ void SparseMatrixCOO::set_row_index(int nz_index, int row) const } template +KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::set_col_index(int nz_index, int column) const { assert(nz_index >= 0); @@ -352,6 +358,7 @@ void SparseMatrixCOO::set_col_index(int nz_index, int column) const } template +KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::set_value(int nz_index, T value) const { assert(nz_index >= 0); @@ -360,6 +367,7 @@ void SparseMatrixCOO::set_value(int nz_index, T value) const } template +KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::add_value(int nz_index, T value) const { assert(nz_index >= 0); From ea10d3f62f402f1ff7432099cc760e9f1d1f6300 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Sat, 2 May 2026 01:05:45 +0200 Subject: [PATCH 08/20] Change nodeBuildInteriorBoundarySolverMatrix to inline --- include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 4de66dfe..dcc84f4d 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -33,10 +33,10 @@ using InnerBoundaryMatrix = SparseMatrixCSR; #endif KOKKOS_INLINE_FUNCTION -static void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, - const InnerBoundaryMatrix& matrix, ConstVector& arr, - ConstVector& att, ConstVector& art, - ConstVector& detDF, ConstVector& coeff_beta) +static inline void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + const InnerBoundaryMatrix& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta) { using smoother_take::getCircleAscIndex; using smoother_take::getStencil; From 65281b7920155eca8aa07674d3e6e4fed020c107 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Sat, 2 May 2026 01:07:30 +0200 Subject: [PATCH 09/20] Remove KOKKOS_INLINE_FUNCTION from nodeBuildInteriorBoundarySolverMatrix --- include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index dcc84f4d..e5219012 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -32,7 +32,6 @@ using InnerBoundaryMatrix = SparseMatrixCOO; using InnerBoundaryMatrix = SparseMatrixCSR; #endif -KOKKOS_INLINE_FUNCTION static inline void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, const InnerBoundaryMatrix& matrix, ConstVector& arr, ConstVector& att, ConstVector& art, From 4068fa5d4d70fc2f15c15bdb854d5e26d1842209 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 02:10:09 +0200 Subject: [PATCH 10/20] Fix --- include/LinearAlgebra/Matrix/coo_matrix.h | 8 ------ .../SmootherTake/buildInnerBoundaryAsc.inl | 19 +++++-------- .../SmootherTake/buildTridiagonalAsc.inl | 27 +++++++------------ .../Smoother/SmootherTake/matrixStencil.inl | 2 -- .../Solvers/coo_mumps_solver.cpp | 2 -- 5 files changed, 17 insertions(+), 41 deletions(-) diff --git a/include/LinearAlgebra/Matrix/coo_matrix.h b/include/LinearAlgebra/Matrix/coo_matrix.h index b893783e..ce4c3112 100644 --- a/include/LinearAlgebra/Matrix/coo_matrix.h +++ b/include/LinearAlgebra/Matrix/coo_matrix.h @@ -62,13 +62,9 @@ class SparseMatrixCOO // -------------------------------------------------------------- // // Const setters — safe for Kokkos device lambdas (host + device) // // -------------------------------------------------------------- // - KOKKOS_INLINE_FUNCTION void set_row_index(int nz_index, int row) const; - KOKKOS_INLINE_FUNCTION void set_col_index(int nz_index, int column) const; - KOKKOS_INLINE_FUNCTION void set_value(int nz_index, T value) const; - KOKKOS_INLINE_FUNCTION void add_value(int nz_index, T value) const; bool is_symmetric() const; @@ -340,7 +336,6 @@ const T& SparseMatrixCOO::value(int nz_index) const } template -KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::set_row_index(int nz_index, int row) const { assert(nz_index >= 0); @@ -349,7 +344,6 @@ void SparseMatrixCOO::set_row_index(int nz_index, int row) const } template -KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::set_col_index(int nz_index, int column) const { assert(nz_index >= 0); @@ -358,7 +352,6 @@ void SparseMatrixCOO::set_col_index(int nz_index, int column) const } template -KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::set_value(int nz_index, T value) const { assert(nz_index >= 0); @@ -367,7 +360,6 @@ void SparseMatrixCOO::set_value(int nz_index, T value) const } template -KOKKOS_INLINE_FUNCTION void SparseMatrixCOO::add_value(int nz_index, T value) const { assert(nz_index >= 0); diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index e5219012..f48178ae 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -9,12 +9,9 @@ namespace smoother_take static inline void update_CSR_COO_MatrixElement(const SparseMatrixCOO& matrix, const int ptr, const int offset, const int row, const int column, const double value) { - std::cout << "Updating COO matrix at row " << row << ", column " << column << " with value " << value << std::endl; matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); matrix.set_value(ptr + offset, value); - std::cout << "Updated COO matrix at row " << matrix.row_index(ptr + offset) << ", column " - << matrix.col_index(ptr + offset) << " with value " << matrix.value(ptr + offset) << std::endl; } #else // When using the in-house solver, the matrix is stored in CSR format. @@ -26,14 +23,9 @@ static inline void update_CSR_COO_MatrixElement(const SparseMatrixCSR& m } #endif -#ifdef GMGPOLAR_USE_MUMPS -using InnerBoundaryMatrix = SparseMatrixCOO; -#else -using InnerBoundaryMatrix = SparseMatrixCSR; -#endif - +template static inline void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, - const InnerBoundaryMatrix& matrix, ConstVector& arr, + const MatrixType& matrix, ConstVector& arr, ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) { @@ -178,10 +170,13 @@ SmootherTake::buildInteriorBoundarySolverMatrix() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); + // Capture pointer to matrix for use in Kokkos parallel loop. + const InnerBoundaryMatrix* matrix_ptr = &inner_boundary_solver_matrix; + Kokkos::parallel_for( "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { - nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att, - art, detDF, coeff_beta); + nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, *matrix_ptr, arr, att, art, detDF, + coeff_beta); }); Kokkos::fence(); diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index f35db61a..2aec8326 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -6,18 +6,12 @@ namespace smoother_take static inline void updateMatrixElement(const BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { - if (row == column) { + if (row == column) solver.set_main_diagonal(batch, row, value); - std::cout << solver.main_diagonal(batch, row) << ", " << value << std::endl; - } - else if (row == column - 1) { + else if (row == column - 1) solver.set_sub_diagonal(batch, row, value); - std::cout << solver.sub_diagonal(batch, row) << ", " << value << std::endl; - } - else if (row == 0 && column == solver.matrixDimension() - 1) { + else if (row == 0 && column == solver.matrixDimension() - 1) solver.set_cyclic_corner(batch, value); - std::cout << solver.cyclic_corner(batch) << ", " << value << std::endl; - } } // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) @@ -318,10 +312,9 @@ void SmootherTake::buildTridiagonalSolverMatrices() const LevelCacheType& level_cache = Smoother::level_cache_; const bool DirBC_Interior = Smoother::DirBC_Interior_; const int num_omp_threads = Smoother::num_omp_threads_; - const BatchedTridiagonalSolver& circle_tridiagonal_solver = - SmootherTake::circle_tridiagonal_solver_; - const BatchedTridiagonalSolver& radial_tridiagonal_solver = - SmootherTake::radial_tridiagonal_solver_; + + const BatchedTridiagonalSolver* circle_tridiagonal_solver_ptr = &circle_tridiagonal_solver_; + const BatchedTridiagonalSolver* radial_tridiagonal_solver_ptr = &radial_tridiagonal_solver_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -344,8 +337,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver, - radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, *circle_tridiagonal_solver_ptr, + *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); }); /* For loop matches radial access pattern */ @@ -357,8 +350,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver, - radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, *circle_tridiagonal_solver_ptr, + *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); }); Kokkos::fence(); diff --git a/include/Smoother/SmootherTake/matrixStencil.inl b/include/Smoother/SmootherTake/matrixStencil.inl index 080e2597..bc11625f 100644 --- a/include/Smoother/SmootherTake/matrixStencil.inl +++ b/include/Smoother/SmootherTake/matrixStencil.inl @@ -1,5 +1,3 @@ -#pragma once - /* -------------- */ /* Stencil access */ /* -------------- */ diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp index 12c78cd4..96abc80b 100644 --- a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -9,8 +9,6 @@ using namespace gmgpolar; CooMumpsSolver::CooMumpsSolver(SparseMatrixCOO matrix) { - std::cout << matrix << std::endl; - if (matrix.is_symmetric()) { matrix_ = extractUpperTriangle(matrix); } From df84288a7d5d4b9cb96dd4bf11b7d80f618ecfd2 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 02:11:52 +0200 Subject: [PATCH 11/20] Improve name --- include/LinearAlgebra/Matrix/csr_matrix.h | 12 ++++++------ .../Smoother/SmootherTake/buildInnerBoundaryAsc.inl | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/LinearAlgebra/Matrix/csr_matrix.h b/include/LinearAlgebra/Matrix/csr_matrix.h index 6ae267aa..a1ee4745 100644 --- a/include/LinearAlgebra/Matrix/csr_matrix.h +++ b/include/LinearAlgebra/Matrix/csr_matrix.h @@ -71,9 +71,9 @@ class SparseMatrixCSR // -------------------------------------------------------------- // // Const setters — safe for Kokkos device lambdas (host + device) // // -------------------------------------------------------------- // - void set_nz_index(int row, int nz_index, int column) const; - void set_nz_entry(int row, int nz_index, T value) const; - void add_nz_entry(int row, int nz_index, T value) const; + void set_row_nz_index(int row, int nz_index, int column) const; + void set_row_nz_entry(int row, int nz_index, T value) const; + void add_row_nz_entry(int row, int nz_index, T value) const; // --------------------------------------------------------------- // // Raw pointer access (host only, for external solvers e.g. MUMPS) // @@ -361,7 +361,7 @@ T& SparseMatrixCSR::row_nz_entry(int row, int nz_index) // ============================================================================ template -void SparseMatrixCSR::set_nz_index(int row, int nz_index, int column) const +void SparseMatrixCSR::set_row_nz_index(int row, int nz_index, int column) const { assert(row >= 0 && row < rows_); assert(nz_index >= 0 && nz_index < row_nz_size(row)); @@ -369,7 +369,7 @@ void SparseMatrixCSR::set_nz_index(int row, int nz_index, int column) const } template -void SparseMatrixCSR::set_nz_entry(int row, int nz_index, T value) const +void SparseMatrixCSR::set_row_nz_entry(int row, int nz_index, T value) const { assert(row >= 0 && row < rows_); assert(nz_index >= 0 && nz_index < row_nz_size(row)); @@ -377,7 +377,7 @@ void SparseMatrixCSR::set_nz_entry(int row, int nz_index, T value) const } template -void SparseMatrixCSR::add_nz_entry(int row, int nz_index, T value) const +void SparseMatrixCSR::add_row_nz_entry(int row, int nz_index, T value) const { assert(row >= 0 && row < rows_); assert(nz_index >= 0 && nz_index < row_nz_size(row)); diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index f48178ae..eb241e44 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -18,8 +18,8 @@ static inline void update_CSR_COO_MatrixElement(const SparseMatrixCOO& m static inline void update_CSR_COO_MatrixElement(const SparseMatrixCSR& matrix, const int ptr, const int offset, const int row, const int column, const double value) { - matrix.set_nz_index(row, offset, column); - matrix.set_nz_entry(row, offset, value); + matrix.set_row_nz_index(row, offset, column); + matrix.set_row_nz_entry(row, offset, value); } #endif From c3848fb0a3fec0252f57d9edb56c799c5816f099 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 02:13:19 +0200 Subject: [PATCH 12/20] remove unnecessary assert --- include/LinearAlgebra/Matrix/coo_matrix.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/LinearAlgebra/Matrix/coo_matrix.h b/include/LinearAlgebra/Matrix/coo_matrix.h index ce4c3112..595b7448 100644 --- a/include/LinearAlgebra/Matrix/coo_matrix.h +++ b/include/LinearAlgebra/Matrix/coo_matrix.h @@ -286,7 +286,6 @@ template int SparseMatrixCOO::non_zero_size() const { assert(this->nnz_ >= 0); - assert(static_cast(this->nnz_) <= static_cast(this->rows_) * static_cast(this->columns_)); return this->nnz_; } From 87dc018419d7a68d6d3fdd18e38e0086fd4ffbe6 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 2 May 2026 23:58:00 +0200 Subject: [PATCH 13/20] Avoid PolarGrid copy --- include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl | 4 +++- include/Smoother/SmootherTake/buildTridiagonalAsc.inl | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index eb241e44..8d8ee436 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -170,12 +170,14 @@ SmootherTake::buildInteriorBoundarySolverMatrix() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); + const PolarGrid* grid_ptr = &grid; + // Capture pointer to matrix for use in Kokkos parallel loop. const InnerBoundaryMatrix* matrix_ptr = &inner_boundary_solver_matrix; Kokkos::parallel_for( "Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) { - nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, *matrix_ptr, arr, att, art, detDF, + nodeBuildInteriorBoundarySolverMatrix(i_theta, *grid_ptr, DirBC_Interior, *matrix_ptr, arr, att, art, detDF, coeff_beta); }); diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index 2aec8326..004c231d 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -325,6 +325,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); + const PolarGrid* grid_ptr = &grid; + /* We split the loops into two regions to better respect the */ /* access patterns of the smoother and improve cache locality. */ @@ -337,7 +339,7 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, *circle_tridiagonal_solver_ptr, + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); }); @@ -350,7 +352,7 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, *circle_tridiagonal_solver_ptr, + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); }); From 8efda86bb898fef7cb74cb5123e73e3adaad21c1 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Mon, 4 May 2026 15:29:03 +0200 Subject: [PATCH 14/20] Update buildTridiagonalAsc.inl --- include/Smoother/SmootherTake/buildTridiagonalAsc.inl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index 004c231d..75ad11c4 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -313,9 +313,6 @@ void SmootherTake::buildTridiagonalSolverMatrices() const bool DirBC_Interior = Smoother::DirBC_Interior_; const int num_omp_threads = Smoother::num_omp_threads_; - const BatchedTridiagonalSolver* circle_tridiagonal_solver_ptr = &circle_tridiagonal_solver_; - const BatchedTridiagonalSolver* radial_tridiagonal_solver_ptr = &radial_tridiagonal_solver_; - assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -339,8 +336,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, - *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, + radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); }); /* For loop matches radial access pattern */ @@ -352,8 +349,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, - *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, + radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); }); Kokkos::fence(); From 2b5ce206c32dcd81014f7d407702db9ef0099fec Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Mon, 4 May 2026 23:41:48 +0200 Subject: [PATCH 15/20] Update buildTridiagonalAsc.inl --- include/Smoother/SmootherTake/buildTridiagonalAsc.inl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index 75ad11c4..004c231d 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -313,6 +313,9 @@ void SmootherTake::buildTridiagonalSolverMatrices() const bool DirBC_Interior = Smoother::DirBC_Interior_; const int num_omp_threads = Smoother::num_omp_threads_; + const BatchedTridiagonalSolver* circle_tridiagonal_solver_ptr = &circle_tridiagonal_solver_; + const BatchedTridiagonalSolver* radial_tridiagonal_solver_ptr = &radial_tridiagonal_solver_; + assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -336,8 +339,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, - radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, + *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); }); /* For loop matches radial access pattern */ @@ -349,8 +352,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, - radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, + *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); }); Kokkos::fence(); From 7af0aab8da4a3622a41fa6c5badf471ee6cdd71d Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Tue, 5 May 2026 19:18:44 +0200 Subject: [PATCH 16/20] Update buildTridiagonalAsc.inl --- include/Smoother/SmootherTake/buildTridiagonalAsc.inl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index 004c231d..e8228a82 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -339,8 +339,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, - *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, + radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); }); /* For loop matches radial access pattern */ @@ -352,8 +352,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, *circle_tridiagonal_solver_ptr, - *radial_tridiagonal_solver_ptr, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, + radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); }); Kokkos::fence(); From 81beaa98d6118a4eb748415ba3e2b40d2f615eac Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Tue, 5 May 2026 19:19:27 +0200 Subject: [PATCH 17/20] Test AllocatableVector tridiagonal_solver.h --- include/LinearAlgebra/Solvers/tridiagonal_solver.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h index 1aa72aa6..c3768148 100644 --- a/include/LinearAlgebra/Solvers/tridiagonal_solver.h +++ b/include/LinearAlgebra/Solvers/tridiagonal_solver.h @@ -328,10 +328,10 @@ class BatchedTridiagonalSolver int matrix_dimension_; int batch_count_; - Vector main_diagonal_; - Vector sub_diagonal_; - Vector buffer_; - Vector gamma_; + AllocatableVector main_diagonal_; + AllocatableVector sub_diagonal_; + AllocatableVector buffer_; + AllocatableVector gamma_; bool is_cyclic_; bool is_factorized_; From ae5c5606e0739688bbc4a63ea711cde9776db862 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Tue, 5 May 2026 19:25:49 +0200 Subject: [PATCH 18/20] Update buildTridiagonalAsc.inl --- include/Smoother/SmootherTake/buildTridiagonalAsc.inl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl index e8228a82..2b06f225 100644 --- a/include/Smoother/SmootherTake/buildTridiagonalAsc.inl +++ b/include/Smoother/SmootherTake/buildTridiagonalAsc.inl @@ -339,8 +339,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_r, const int i_theta) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, - radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); }); /* For loop matches radial access pattern */ @@ -352,8 +352,8 @@ void SmootherTake::buildTridiagonalSolverMatrices() ), // Kokkos lambda function to execute for each point in the index space KOKKOS_LAMBDA(const int i_theta, const int i_r) { - nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver, - radial_tridiagonal_solver, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); }); Kokkos::fence(); From 8069581b8165fdb011116488512e915c33b46fb3 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Tue, 12 May 2026 11:36:22 +0200 Subject: [PATCH 19/20] Missing const --- .../Smoother/SmootherTake/buildInnerBoundaryAsc.inl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 86e3ddeb..60fcf88b 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -6,17 +6,17 @@ namespace smoother_take #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static inline void update_CSR_COO_MatrixElement(const SparseMatrixCOO& matrix, int ptr, + int offset, int row, int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); - matrix.set_value(ptr + offset , value); + matrix.set_value(ptr + offset, value); } #else // When using the in-house solver, the matrix is stored in CSR format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, - int row, int column, double value) +static inline void update_CSR_COO_MatrixElement(const SparseMatrixCSR& matrix, int ptr, + int offset, int row, int column, double value) { matrix.set_row_nz_index(row, offset, column); matrix.set_row_nz_entry(row, offset, value); From c2fae2e355afde4953a2832711a6ce2cda4cddb2 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Tue, 12 May 2026 11:38:40 +0200 Subject: [PATCH 20/20] Clang formatting --- include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl | 4 ++-- include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl | 4 ++-- .../ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl | 4 ++-- .../ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl | 4 ++-- include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl b/include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl index 59637462..48b2877a 100644 --- a/include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_give #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int column, - double value) +static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, + int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); diff --git a/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl b/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl index 4cdb4487..a144a9c6 100644 --- a/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_take #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int column, - double value) +static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, + int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl index 0ff8b1b2..ad6b3362 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl @@ -7,8 +7,8 @@ namespace extrapolated_smoother_give #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, + int row, int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl index aae19b48..ebf2eb4d 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl @@ -5,8 +5,8 @@ namespace extrapolated_smoother_take #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, + int row, int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); diff --git a/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl index dd92f089..c59c0359 100644 --- a/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl @@ -5,8 +5,8 @@ namespace smoother_give #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, + int row, int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column);