-
Notifications
You must be signed in to change notification settings - Fork 182
/
ospMPIDistribTutorialPartialRepl.cpp
245 lines (205 loc) · 7.88 KB
/
ospMPIDistribTutorialPartialRepl.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
// Copyright 2018 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
/* This larger example shows how to use the MPIDistributedDevice to write an
* interactive rendering application, which shows a UI on rank 0 and uses
* all ranks in the MPI world for data loading and rendering. This example
* also shows how to leverage the support for partially replicated data
* distributions in the MPIDistributedDevice, by sharing bricks of data,
* and thus rendering work, between processes. Each pair of ranks will
* generate the same data, thereby distributing the rendering workload
* for the brick between them.
*/
#include <imgui.h>
#include <mpi.h>
#include <iterator>
#include <memory>
#include <random>
#include "GLFWDistribOSPRayWindow.h"
#include "ospray/ospray_cpp.h"
#include "ospray/ospray_cpp/ext/rkcommon.h"
#include "ospray/ospray_util.h"
#include "rkcommon/utility/getEnvVar.h"
using namespace ospray;
using namespace rkcommon;
using namespace rkcommon::math;
struct VolumeBrick
{
// the volume data itself
cpp::Volume brick;
cpp::VolumetricModel model;
cpp::Group group;
cpp::Instance instance;
// the bounds of the owned portion of data
box3f bounds;
// the full bounds of the owned portion + ghost voxels
box3f ghostBounds;
};
static box3f worldBounds;
// Generate the rank's local volume brick
VolumeBrick makeLocalVolume(const int mpiRank, const int mpiWorldSize);
int main(int argc, char **argv)
{
int mpiThreadCapability = 0;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &mpiThreadCapability);
if (mpiThreadCapability != MPI_THREAD_MULTIPLE
&& mpiThreadCapability != MPI_THREAD_SERIALIZED) {
fprintf(stderr,
"OSPRay requires the MPI runtime to support thread "
"multiple or thread serialized.\n");
return 1;
}
int mpiRank = 0;
int mpiWorldSize = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
std::cout << "OSPRay rank " << mpiRank << "/" << mpiWorldSize << "\n";
// load the MPI module, and select the MPI distributed device. Here we
// do not call ospInit, as we want to explicitly pick the distributed
// device
auto OSPRAY_MPI_DISTRIBUTED_GPU =
utility::getEnvVar<int>("OSPRAY_MPI_DISTRIBUTED_GPU").value_or(0);
if (OSPRAY_MPI_DISTRIBUTED_GPU) {
ospLoadModule("mpi_distributed_gpu");
} else {
ospLoadModule("mpi_distributed_cpu");
}
{
cpp::Device mpiDevice("mpiDistributed");
mpiDevice.commit();
mpiDevice.setCurrent();
// set an error callback to catch any OSPRay errors and exit the application
ospDeviceSetErrorCallback(
mpiDevice.handle(),
[](void *, OSPError error, const char *errorDetails) {
std::cerr << "OSPRay error: " << errorDetails << std::endl;
exit(error);
},
nullptr);
// every two ranks will share a volume brick to render, if we have an
// odd number of ranks the last one will have its own brick
const int sharedWorldSize = mpiWorldSize / 2 + mpiWorldSize % 2;
// all ranks specify the same rendering parameters, with the exception of
// the data to be rendered, which is distributed among the ranks
VolumeBrick brick = makeLocalVolume(mpiRank / 2, sharedWorldSize);
// create the "world" model which will contain all of our geometries
cpp::World world;
world.setParam("instance", cpp::CopiedData(brick.instance));
world.setParam("region", cpp::CopiedData(brick.bounds));
world.commit();
// create OSPRay renderer
cpp::Renderer renderer("mpiRaycast");
// create and setup an ambient light
cpp::Light ambientLight("ambient");
ambientLight.commit();
renderer.setParam("light", cpp::CopiedData(ambientLight));
// create a GLFW OSPRay window: this object will create and manage the
// OSPRay frame buffer and camera directly
auto glfwOSPRayWindow =
std::unique_ptr<GLFWDistribOSPRayWindow>(new GLFWDistribOSPRayWindow(
vec2i{1024, 768}, worldBounds, world, renderer));
int spp = 1;
int currentSpp = 1;
if (mpiRank == 0) {
glfwOSPRayWindow->registerImGuiCallback(
[&]() { ImGui::SliderInt("pixelSamples", &spp, 1, 64); });
}
glfwOSPRayWindow->registerDisplayCallback(
[&](GLFWDistribOSPRayWindow *win) {
// Send the UI changes out to the other ranks so we can synchronize
// how many samples per-pixel we're taking
MPI_Bcast(&spp, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (spp != currentSpp) {
currentSpp = spp;
renderer.setParam("pixelSamples", spp);
win->addObjectToCommit(renderer.handle());
}
});
// start the GLFW main loop, which will continuously render
glfwOSPRayWindow->mainLoop();
}
// cleanly shut OSPRay down
ospShutdown();
MPI_Finalize();
return 0;
}
bool computeDivisor(int x, int &divisor)
{
int upperBound = std::sqrt(x);
for (int i = 2; i <= upperBound; ++i) {
if (x % i == 0) {
divisor = i;
return true;
}
}
return false;
}
// Compute an X x Y x Z grid to have 'num' grid cells,
// only gives a nice grid for numbers with even factors since
// we don't search for factors of the number, we just try dividing by two
vec3i computeGrid(int num)
{
vec3i grid(1);
int axis = 0;
int divisor = 0;
while (computeDivisor(num, divisor)) {
grid[axis] *= divisor;
num /= divisor;
axis = (axis + 1) % 3;
}
if (num != 1) {
grid[axis] *= num;
}
return grid;
}
VolumeBrick makeLocalVolume(const int mpiRank, const int mpiWorldSize)
{
const vec3i grid = computeGrid(mpiWorldSize);
const vec3i brickId(mpiRank % grid.x,
(mpiRank / grid.x) % grid.y,
mpiRank / (grid.x * grid.y));
// The bricks are 64^3 + 1 layer of ghost voxels on each axis
const vec3i brickVolumeDims = vec3i(32);
const vec3i brickGhostDims = vec3i(brickVolumeDims + 2);
// The grid is over the [0, grid * brickVolumeDims] box
worldBounds = box3f(vec3f(0.f), vec3f(grid * brickVolumeDims));
const vec3f brickLower = brickId * brickVolumeDims;
const vec3f brickUpper = brickId * brickVolumeDims + brickVolumeDims;
VolumeBrick brick;
brick.bounds = box3f(brickLower, brickUpper);
// we just put ghost voxels on all sides here, but a real application
// would change which faces of each brick have ghost voxels dependent
// on the actual data
brick.ghostBounds = box3f(brickLower - vec3f(1.f), brickUpper + vec3f(1.f));
brick.brick = cpp::Volume("structuredRegular");
brick.brick.setParam("dimensions", brickGhostDims);
// we use the grid origin to place this brick in the right position inside
// the global volume
brick.brick.setParam("gridOrigin", brick.ghostBounds.lower);
// generate the volume data to just be filled with this rank's id
const size_t nVoxels = brickGhostDims.x * brickGhostDims.y * brickGhostDims.z;
std::vector<uint8_t> volumeData(nVoxels, static_cast<uint8_t>(mpiRank));
brick.brick.setParam("data",
cpp::CopiedData(static_cast<const uint8_t *>(volumeData.data()),
vec3ul(brickVolumeDims)));
brick.brick.commit();
brick.model = cpp::VolumetricModel(brick.brick);
cpp::TransferFunction tfn("piecewiseLinear");
std::vector<vec3f> colors = {vec3f(0.f, 0.f, 1.f), vec3f(1.f, 0.f, 0.f)};
std::vector<float> opacities = {0.05f, 1.f};
tfn.setParam("color", cpp::CopiedData(colors));
tfn.setParam("opacity", cpp::CopiedData(opacities));
// color the bricks by their rank, we pad the range out a bit to keep
// any brick from being completely transparent
range1f valueRange = range1f(0, mpiWorldSize);
tfn.setParam("value", valueRange);
tfn.commit();
brick.model.setParam("transferFunction", tfn);
brick.model.setParam("samplingRate", 0.5f);
brick.model.commit();
brick.group = cpp::Group();
brick.group.setParam("volume", cpp::CopiedData(brick.model));
brick.group.commit();
brick.instance = cpp::Instance(brick.group);
brick.instance.commit();
return brick;
}