Skip to content

Commit 737291f

Browse files
committed
Add support for critical regions in device code.
Review: https://reviews.llvm.org/D145831
1 parent beafd23 commit 737291f

File tree

2 files changed

+67
-2
lines changed

2 files changed

+67
-2
lines changed

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ int testLock(omp_lock_t *);
130130
void initLock(omp_lock_t *);
131131
void destroyLock(omp_lock_t *);
132132
void setLock(omp_lock_t *);
133+
void unsetCriticalLock(omp_lock_t *);
134+
void setCriticalLock(omp_lock_t *);
133135

134136
/// AMDGCN Implementation
135137
///
@@ -269,6 +271,25 @@ void initLock(omp_lock_t *) { __builtin_trap(); }
269271
void destroyLock(omp_lock_t *) { __builtin_trap(); }
270272
void setLock(omp_lock_t *) { __builtin_trap(); }
271273

274+
constexpr uint32_t UNSET = 0;
275+
constexpr uint32_t SET = 1;
276+
277+
void unsetCriticalLock(omp_lock_t *Lock) {
278+
(void)atomicExchange((uint32_t *)Lock, UNSET, atomic::acq_rel);
279+
}
280+
281+
void setCriticalLock(omp_lock_t *Lock) {
282+
uint64_t LowestActiveThread = utils::ffs(mapping::activemask()) - 1;
283+
if (mapping::getThreadIdInWarp() == LowestActiveThread) {
284+
fenceKernel(atomic::release);
285+
while (!atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::relaxed,
286+
atomic::relaxed)) {
287+
__builtin_amdgcn_s_sleep(32);
288+
}
289+
fenceKernel(atomic::aquire);
290+
}
291+
}
292+
272293
#pragma omp end declare variant
273294
///}
274295

@@ -450,6 +471,14 @@ uint32_t atomic::inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
450471
return impl::atomicInc(Addr, V, Ordering);
451472
}
452473

474+
void unsetCriticalLock(omp_lock_t *Lock) {
475+
impl::unsetLock(Lock);
476+
}
477+
478+
void setCriticalLock(omp_lock_t *Lock) {
479+
impl::setLock(Lock);
480+
}
481+
453482
extern "C" {
454483
void __kmpc_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
455484

@@ -518,12 +547,12 @@ void __kmpc_syncwarp(uint64_t Mask) {
518547

519548
void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) {
520549
FunctionTracingRAII();
521-
omp_set_lock(reinterpret_cast<omp_lock_t *>(Name));
550+
impl::setCriticalLock(reinterpret_cast<omp_lock_t *>(Name));
522551
}
523552

524553
void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) {
525554
FunctionTracingRAII();
526-
omp_unset_lock(reinterpret_cast<omp_lock_t *>(Name));
555+
impl::unsetCriticalLock(reinterpret_cast<omp_lock_t *>(Name));
527556
}
528557

529558
void omp_init_lock(omp_lock_t *Lock) { impl::initLock(Lock); }
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: %libomptarget-compilexx-run-and-check-generic
2+
3+
// UNSUPPORTED: nvptx64-nvidia-cuda
4+
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
5+
// UNSUPPORTED: x86_64-pc-linux-gnu
6+
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
7+
8+
#include <omp.h>
9+
#include <stdio.h>
10+
11+
#define N 1000000
12+
13+
int A[N];
14+
int main() {
15+
for (int i = 0; i < N; i++)
16+
A[i] = 1;
17+
18+
int sum[1];
19+
sum[0] = 0;
20+
21+
#pragma omp target teams distribute parallel for num_teams(256) \
22+
schedule(static, 1) map(to \
23+
: A[:N]) map(tofrom \
24+
: sum[:1])
25+
{
26+
for (int i = 0; i < N; i++) {
27+
#pragma omp critical
28+
{ sum[0] += A[i]; }
29+
}
30+
}
31+
32+
// CHECK: SUM = 1000000
33+
printf("SUM = %d\n", sum[0]);
34+
35+
return 0;
36+
}

0 commit comments

Comments
 (0)