-
Notifications
You must be signed in to change notification settings - Fork 4
Substitute COO_Mumps_Solver in ExtrapolatedSmoother #206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a39112f
2a72cee
2e9f71d
2efa57f
fa3c7cd
90d3374
bee2de9
ce1ab16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -798,80 +798,63 @@ SparseMatrixCSR<double> DirectSolver_CSR_LU_Give<LevelCacheType>::buildSolverMat | |
|
|
||
| SparseMatrixCSR<double> solver_matrix(n, n, nnz_per_row); | ||
|
|
||
| if (num_omp_threads == 1) { | ||
| /* Single-threaded execution */ | ||
| for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } | ||
| for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } | ||
| } | ||
| else { | ||
| /* Multi-threaded execution: For Loops */ | ||
| const int num_circle_tasks = grid.numberSmootherCircles(); | ||
| const int additional_radial_tasks = grid.ntheta() % 3; | ||
| const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; | ||
|
|
||
| const int num_smoother_circles = grid.numberSmootherCircles(); | ||
| const int additional_radial_tasks = grid.ntheta() % 3; | ||
| const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; | ||
|
|
||
| /* ---------------- */ | ||
| /* Circular section */ | ||
| /* ---------------- */ | ||
| // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles. | ||
| #pragma omp parallel num_threads(num_omp_threads) | ||
| { | ||
| { | ||
| #pragma omp for | ||
| for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { | ||
| int i_r = grid.numberSmootherCircles() - circle_task - 1; | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } | ||
| for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } /* Implicit barrier */ | ||
| #pragma omp for | ||
| for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { | ||
| int i_r = grid.numberSmootherCircles() - circle_task - 1; | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } | ||
| #pragma omp for nowait | ||
| for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { | ||
| int i_r = grid.numberSmootherCircles() - circle_task - 1; | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } | ||
| for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } /* Implicit barrier */ | ||
| #pragma omp for | ||
| for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { | ||
| buildSolverMatrixCircleSection(i_r, solver_matrix); | ||
| } /* Implicit barrier */ | ||
| } | ||
|
|
||
| /* ---------------- */ | ||
| /* Radial section */ | ||
| /* ---------------- */ | ||
| // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines. | ||
| // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks | ||
| // that are handled serially before the parallel loops. | ||
| if (additional_radial_tasks > 0) { | ||
| const int i_theta = 0; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } | ||
|
|
||
| if (additional_radial_tasks > 1) { | ||
| const int i_theta = 1; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } | ||
|
|
||
| #pragma omp parallel num_threads(num_omp_threads) | ||
| { | ||
| #pragma omp for | ||
| for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { | ||
| if (radial_task > 0) { | ||
| int i_theta = radial_task + additional_radial_tasks; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } | ||
| else { | ||
| if (additional_radial_tasks == 0) { | ||
| buildSolverMatrixRadialSection(0, solver_matrix); | ||
| } | ||
| else if (additional_radial_tasks >= 1) { | ||
| buildSolverMatrixRadialSection(0, solver_matrix); | ||
| buildSolverMatrixRadialSection(1, solver_matrix); | ||
| } | ||
| } | ||
| } | ||
| for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { | ||
| const int i_theta = radial_task + additional_radial_tasks; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } /* Implicit barrier */ | ||
| #pragma omp for | ||
| for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { | ||
| if (radial_task > 1) { | ||
| int i_theta = radial_task + additional_radial_tasks; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } | ||
| else { | ||
| if (additional_radial_tasks == 0) { | ||
| buildSolverMatrixRadialSection(1, solver_matrix); | ||
| } | ||
| else if (additional_radial_tasks == 1) { | ||
| buildSolverMatrixRadialSection(2, solver_matrix); | ||
| } | ||
| else if (additional_radial_tasks == 2) { | ||
| buildSolverMatrixRadialSection(2, solver_matrix); | ||
| buildSolverMatrixRadialSection(3, solver_matrix); | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+831
to
-868
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't quite line up. Before you had: if (additional_radial_tasks == 0) {
buildSolverMatrixRadialSection(1, solver_matrix);
}
else if (additional_radial_tasks == 1) {
buildSolverMatrixRadialSection(2, solver_matrix);
}
else if (additional_radial_tasks == 2) {
buildSolverMatrixRadialSection(2, solver_matrix);
buildSolverMatrixRadialSection(3, solver_matrix);
}
for (int radial_task = 4; radial_task < num_radial_tasks; radial_task += 3) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}but now you have: if (additional_radial_tasks > 0) {
const int i_theta = 0;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
if (additional_radial_tasks > 1) {
const int i_theta = 1;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */So for
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Example: additional tasks = 2 Before: Step 1: {{0,1},{5},{8},... BARRIER Step 2: {{2,3},{6},{9},... BARRIER Step 3: {{4},{7},{10},... Now: additional tasks = 2 Step 1: {0} BARRIER Step 2: {1} BARRIER Step 1: {{2},{5},{8},... BARRIER Step 2: {{3},{6},{9},... BARRIER Step 3: {{4},{7},{10},... I think the new version is much nicer and easier to port to Kokkos. |
||
| for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { | ||
| const int i_theta = radial_task + additional_radial_tasks; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } /* Implicit barrier */ | ||
| #pragma omp for | ||
| for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { | ||
| int i_theta = radial_task + additional_radial_tasks; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } | ||
| } | ||
| for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { | ||
| const int i_theta = radial_task + additional_radial_tasks; | ||
| buildSolverMatrixRadialSection(i_theta, solver_matrix); | ||
| } /* Implicit barrier */ | ||
| } | ||
|
|
||
| return solver_matrix; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c | |
| int i_r, i_theta; | ||
| grid.multiIndex(global_index, i_r, i_theta); | ||
|
|
||
| const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; | ||
| const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9; | ||
| const int size_stencil_interior = 9; | ||
| const int size_stencil_next_outer_boundary = 9; | ||
| const int size_stencil_outer_boundary = 1; | ||
| const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; | ||
| const int size_stencil_interior = 9; | ||
| const int size_stencil_outer_boundary = 1; | ||
|
|
||
| if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) { | ||
| if ((i_r > 0 && i_r < grid.nr() - 1)) { | ||
|
Comment on lines
-18
to
+16
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you changed the indexing to be 0-based instead of 1-based? This would explain my previous question, except that here you are working on i_r not i_theta. Why do you no longer need the check for
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because in the Direct CSR Solver there is no symmetry shift. The stencil of next_to_boundary is the full 9 stencil. Currently DirectSolver Mumps constructs mazrices symmetric while DirectSolver CSR doesnt construct it symmetric.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| return size_stencil_interior; | ||
| } | ||
| else if (i_r == 0 && !DirBC_Interior) { | ||
|
|
@@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c | |
| else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) { | ||
| return size_stencil_outer_boundary; | ||
| } | ||
| else if (i_r == 1 && DirBC_Interior) { | ||
| return size_stencil_next_inner_boundary; | ||
| } | ||
| else if (i_r == grid.nr() - 2) { | ||
| return size_stencil_next_outer_boundary; | ||
| } | ||
|
|
||
| throw std::out_of_range("Invalid index for stencil"); | ||
| } | ||
|
|
||
|
|
@@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con | |
| assert(0 <= i_r && i_r < grid.nr()); | ||
| assert(grid.nr() >= 4); | ||
|
|
||
| if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) { | ||
| if ((i_r > 0 && i_r < grid.nr() - 1)) { | ||
| return stencil_interior_; | ||
| } | ||
| else if (i_r == 0 && !DirBC_Interior) { | ||
|
|
@@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con | |
| else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) { | ||
| return stencil_DB_; | ||
| } | ||
| else if (i_r == 1 && DirBC_Interior) { | ||
| return stencil_next_inner_DB_; | ||
| } | ||
| else if (i_r == grid.nr() - 2) { | ||
| return stencil_next_outer_DB_; | ||
| } | ||
|
|
||
| throw std::out_of_range("Invalid index for stencil"); | ||
| } | ||
|
|
||
|
|
@@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getNonZeroCountSolverMatrix() cons | |
| const PolarGrid& grid = DirectSolver<LevelCacheType>::grid_; | ||
| const bool DirBC_Interior = DirectSolver<LevelCacheType>::DirBC_Interior_; | ||
|
|
||
| const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; | ||
| const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9; | ||
| const int size_stencil_interior = 9; | ||
| const int size_stencil_next_outer_boundary = 9; | ||
| const int size_stencil_outer_boundary = 1; | ||
|
|
||
| assert(grid.nr() >= 4); | ||
| const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; | ||
| const int size_stencil_interior = 9; | ||
| const int size_stencil_outer_boundary = 1; | ||
|
|
||
| return grid.ntheta() * | ||
| (size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior + | ||
| size_stencil_next_outer_boundary + size_stencil_outer_boundary); | ||
| (size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary); | ||
| } | ||


Uh oh!
There was an error while loading. Please reload this page.