-
Notifications
You must be signed in to change notification settings - Fork 0
/
openmpTests.C
105 lines (98 loc) · 2.95 KB
/
openmpTests.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#include "ListOps.H"
#include "catch2/catch_all.hpp"
#include "catch2/catch_message.hpp"
#include "catch2/catch_test_macros.hpp"
#include "vector.H"
#include "Field.H"
#include "Random.H"
#include <functional>
#include <ranges>
// This file benchmarks a trivial expression template implementation against views
// for lazy evaluation
using namespace Foam;
label ELEMENTS_COUNT = 500000;
// Default mode of computation
template<class T>
Field<T> originalLoop(const scalarField& r) {
Field<T> vf(ELEMENTS_COUNT, zero());
auto W = [&](const scalar x) {
return Foam::exp(-pow(1e-2*x,2));
};
scalarField rho(vf.size(), 1.0);
scalarField m(vf.size(), 0.5);
scalarField w(vf.size(), 0.0);
forAll(vf, i) {
vf[i] = m[i] * W(r[i]) / rho[i] + (m[i] + rho[i]) * W(r[i]) / m[i];
}
return vf;
}
#define THREAD_NUM 8
// OpenMP multi-threading mode of computation
template<class T>
Field<T> openMPLoop(const scalarField& r) {
Field<T> vf(ELEMENTS_COUNT, zero());
auto W = [&](const scalar x) {
return Foam::exp(-pow(1e-2*x,2));
};
scalarField rho(vf.size(), 1.0);
scalarField m(vf.size(), 0.5);
scalarField w(vf.size(), 0.0);
#pragma omp parallel for
forAll(vf, i) {
vf[i] = m[i] * W(r[i]) / rho[i] + (m[i] + rho[i]) * W(r[i]) / m[i];
}
return vf;
}
// OpenMP multi-threading mode of computation
template<class T>
Field<T> openMPGPULoop(const scalarField& r) {
Field<T> vf(ELEMENTS_COUNT, zero());
auto W = [&](const scalar x) {
return Foam::exp(-pow(1e-2*x,2));
};
scalarField rho(vf.size(), 1.0);
scalarField m(vf.size(), 0.5);
scalarField w(vf.size(), 0.0);
#pragma omp target
#pragma omp teams distribute parallel for
forAll(vf, i) {
vf[i] = m[i] * W(r[i]) / rho[i] + (m[i] + rho[i]) * W(r[i]) / m[i];
}
return vf;
}
TEMPLATE_TEST_CASE
(
"Execution time for OpenFOAM loops over a list of elements",
"[cavity][serial][benchmark]",
scalar
) {
Info << "Running with OpenMP " << _OPENMP << nl;
scalarField r(ELEMENTS_COUNT, 1.0);
for (auto& ri: r) {
ri = Random().sample01<scalar>();
}
auto ol = originalLoop<TestType>(r);
auto omp = openMPLoop<TestType>(r);
auto gpuomp = openMPGPULoop<TestType>(r);
auto testExp = Catch::Matchers::Predicate<Field<TestType>>
(
[&ol](const Field<TestType>& result) {
if (result.size() != ol.size()) return false;
for (int i = 0; i < result.size(); ++i)
if (Catch::Approx(result[i]).margin(1e-6) != ol[i]) return false;
return true;
},
"Field elements must approximately match"
);
REQUIRE_THAT(omp, testExp);
REQUIRE_THAT(gpuomp, testExp);
BENCHMARK("Original looping") {
return originalLoop<TestType>(r);
};
BENCHMARK("CPU openMP looping") {
return openMPLoop<TestType>(r);
};
BENCHMARK("GPU OpenMP offloading") {
return openMPGPULoop<TestType>(r);
};
}