-
Notifications
You must be signed in to change notification settings - Fork 51
/
grid_parallel_example.cpp
255 lines (229 loc) · 10.6 KB
/
grid_parallel_example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/****************************************************************************
* Copyright (c) 2018-2023 by the Cabana authors *
* All rights reserved. *
* *
* This file is part of the Cabana library. Cabana is distributed under a *
* BSD 3-clause license. For the licensing terms see the LICENSE file in *
* the top-level directory. *
* *
* SPDX-License-Identifier: BSD-3-Clause *
****************************************************************************/
#include <Cabana_Grid.hpp>
#include <Kokkos_Core.hpp>
#include <iostream>
//---------------------------------------------------------------------------//
// Grid parallel example.
//---------------------------------------------------------------------------//
void gridParallelExample()
{
/*
Just as the core library extends the Kokkos::parallel_for and reduce
concepts for the AoSoA and neighbor lists, Cabana::Grid provides
capabilities for parallel iteration over structured grids.
Most use cases will likely use the interface which use the local grids
directly, but there are additional options for using index spaces,
including multiple index spaces.
All data structures (global mesh/grid, local grid/mesh, and array data)
are created first.
*/
std::cout << "Cabana::Grid Grid Parallel Example\n" << std::endl;
using exec_space = Kokkos::DefaultHostExecutionSpace;
using device_type = exec_space::device_type;
// Let MPI compute the partitioning for this example.
Cabana::Grid::DimBlockPartitioner<3> partitioner;
// Create the global mesh.
double cell_size = 0.23;
std::array<int, 3> global_num_cell = { 39, 42, 55 };
std::array<bool, 3> is_dim_periodic = { false, true, true };
std::array<double, 3> global_low_corner = { 1.2, 3.3, -2.8 };
std::array<double, 3> global_high_corner = {
global_low_corner[0] + cell_size * global_num_cell[0],
global_low_corner[1] + cell_size * global_num_cell[1],
global_low_corner[2] + cell_size * global_num_cell[2] };
auto global_mesh = Cabana::Grid::createUniformGlobalMesh(
global_low_corner, global_high_corner, global_num_cell );
// Create the global grid.
auto global_grid = Cabana::Grid::createGlobalGrid(
MPI_COMM_WORLD, global_mesh, is_dim_periodic, partitioner );
// Create an array layout on the cells.
int halo_width = 2;
int dofs_per_cell = 4;
auto cell_layout = Cabana::Grid::createArrayLayout(
global_grid, halo_width, dofs_per_cell, Cabana::Grid::Cell() );
auto local_grid = cell_layout->localGrid();
// Create an array.
std::string label( "example_array" );
auto array =
Cabana::Grid::createArray<double, device_type>( label, cell_layout );
//-----------------------------------------------------------------------//
/*
The grid parallel functions first take a kernel label, then an execution
space rather than a direct Kokkos range policy. Internally, a
multidimensional range policy (Kokkos::MDRangePolicy) is created with the
execution space given. Next, the Own or Ghost tag is passed to define
whether the kernel should include ghosted entities or not (remember that
specifying Ghost includes both owned and ghosted entities). The mesh
entity is passed next and finally, the parallel kernel.
Here we use a lambda function for simplicity, but a functor with or
without tag arguments are also options. Note that the kernel signature
uses the three indices of the 3D grid. We set every value on the cells in
the grid to 1.
*/
auto array_view = array->view();
Cabana::Grid::grid_parallel_for(
"local_grid_for", exec_space(), *local_grid, Cabana::Grid::Ghost(),
Cabana::Grid::Cell(),
KOKKOS_LAMBDA( const int i, const int j, const int k ) {
for ( int l = 0; l < 4; ++l )
array_view( i, j, k, l ) = 1.0;
} );
/*
We can similarly do grid reductions with the same arguments as
parallel_for; the only change is the reduction variable in the kernel
signature and the final return value.
*/
double sum = 0.0;
Cabana::Grid::grid_parallel_reduce(
"local_grid_reduce", exec_space(), *local_grid, Cabana::Grid::Ghost(),
Cabana::Grid::Cell(),
KOKKOS_LAMBDA( const int i, const int j, const int k, double& result ) {
for ( int l = 0; l < 4; ++l )
result += array_view( i, j, k, l );
},
sum );
/*
Here, the total should match the total grid size (including ghosts) times
the four field values for each grid point.
*/
auto ghost_is = local_grid->indexSpace(
Cabana::Grid::Ghost(), Cabana::Grid::Cell(), Cabana::Grid::Local() );
std::cout << "Local grid sum: " << sum << " (should be "
<< 4 * ghost_is.size() << ")\n"
<< std::endl;
//-----------------------------------------------------------------------//
/*
Grid parallel operations are also possible by directly using an index
space instead of using the index space from within the local grid. Here we
subtract 2 from every entity.
*/
auto own_is = local_grid->indexSpace(
Cabana::Grid::Own(), Cabana::Grid::Cell(), Cabana::Grid::Local() );
Cabana::Grid::grid_parallel_for(
"index_space_for", exec_space(), own_is,
KOKKOS_LAMBDA( const int i, const int j, const int k ) {
for ( int l = 0; l < 4; ++l )
array_view( i, j, k, l ) -= 2.0;
} );
/*
Just as before with the local grid we can do a grid reduction with the
index space as well.
*/
double sum_is = 0.0;
Cabana::Grid::grid_parallel_reduce(
"index_space_reduce", exec_space(), own_is,
KOKKOS_LAMBDA( const int i, const int j, const int k, double& result ) {
for ( int l = 0; l < 4; ++l )
result += array_view( i, j, k, l );
},
sum_is );
/*
Now the total should be the negative of the total owned grid size, again
times the four field values for each grid point.
*/
std::cout << "Index space grid sum: " << sum_is << " (should be "
<< -4 * own_is.size() << ")\n"
<< std::endl;
//-----------------------------------------------------------------------//
/*
An additional option in using the index space interface is using the index
space returned by the array layout. Note that the functor signature
changes here since the field dimension is included in the index space
(rather than looping in serial over the last dimension as in the previous
cases).
For this kernel we divide every entity by the total number of cells.
*/
auto own_array_is =
cell_layout->indexSpace( Cabana::Grid::Own(), Cabana::Grid::Local() );
Cabana::Grid::grid_parallel_for(
"index_space_for", exec_space(), own_array_is,
KOKKOS_LAMBDA( const int i, const int j, const int k, const int l ) {
array_view( i, j, k, l ) /= sum_is;
} );
/*
Do another grid reduction to check the result.
*/
double sum_layout_is = 0.0;
Cabana::Grid::grid_parallel_reduce(
"index_space_reduce", exec_space(), own_array_is,
KOKKOS_LAMBDA( const int i, const int j, const int k, const int l,
double& result ) { result += array_view( i, j, k, l ); },
sum_layout_is );
/*
Now the total should be 1.
*/
std::cout << "Array layout index space grid sum: " << sum_layout_is
<< " (should be 1)\n"
<< std::endl;
//-----------------------------------------------------------------------//
/*
One potentially useful extension of directly using index spaces is that
they can be fused together into a single parallel kernel. Here we iterate
over both the local and boundary index spaces at the same time, combined
in a Kokkos::Array and differentiated with the index in the array in the
kernel.
*/
auto boundary_is = local_grid->boundaryIndexSpace(
Cabana::Grid::Own(), Cabana::Grid::Cell(), 1, 0, 0 );
Cabana::Grid::grid_parallel_for(
"multi_space_for", exec_space{},
Kokkos::Array<Cabana::Grid::IndexSpace<3>, 2>{ own_is, boundary_is },
// The first index in the functor signature is which index space is
// being used (from the array in the previous argument).
KOKKOS_LAMBDA( const int s, const int i, const int j, const int k ) {
for ( int l = 0; l < 4; ++l )
{
// Now we can differentiate updates in kernel based on which
// index space we're using. We set the value for the owned space
// and increment (afterwards) only on the boundary.
if ( 0 == s )
array_view( i, j, k, l ) = 0.5;
else if ( 1 == s )
array_view( i, j, k, l ) += 1.0;
}
} );
/*
Here we reduce only over each index space separately to check the value.
*/
double sum_bound = 0.0;
Cabana::Grid::grid_parallel_reduce(
"boundary_space_reduce", exec_space(), own_is,
KOKKOS_LAMBDA( const int i, const int j, const int k, double& result ) {
for ( int l = 0; l < 4; ++l )
result += array_view( i, j, k, l );
},
sum_bound );
/*
This total should be the size of the owned space, times the value, times
the four field values for each grid point plus the boundary space size.
*/
std::cout << "Multiple index space grid sum: " << sum_bound
<< " (should be "
<< 4 * 0.5 * own_is.size() + 4 * boundary_is.size() << ")\n"
<< std::endl;
}
//---------------------------------------------------------------------------//
// Main.
//---------------------------------------------------------------------------//
int main( int argc, char* argv[] )
{
// MPI only needed to create the grid/mesh. Not intended to be run with
// multiple ranks.
MPI_Init( &argc, &argv );
{
Kokkos::ScopeGuard scope_guard( argc, argv );
gridParallelExample();
}
MPI_Finalize();
return 0;
}
//---------------------------------------------------------------------------//