Skip to content

Commit 64549f0

Browse files
randreshgjosemonsalve2
authored andcommitted
[OpenMP][5.1] Fix parallel masked is ignored llvm#59939
Code generation support for 'parallel masked' directive. The `EmitOMPParallelMaskedDirective` was implemented. In addition, the appropiate device functions were added. Fix llvm#59939. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D143527
1 parent 475dd6f commit 64549f0

File tree

8 files changed

+263
-3
lines changed

8 files changed

+263
-3
lines changed

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
428428
llvm_unreachable("target parallel loop directive not supported yet.");
429429
break;
430430
case Stmt::OMPParallelMaskedDirectiveClass:
431-
llvm_unreachable("parallel masked directive not supported yet.");
431+
EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S));
432432
break;
433433
}
434434
}

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4489,6 +4489,33 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(
44894489
checkForLastprivateConditionalUpdate(*this, S);
44904490
}
44914491

4492+
void CodeGenFunction::EmitOMPParallelMaskedDirective(
4493+
const OMPParallelMaskedDirective &S) {
4494+
// Emit directive as a combined directive that consists of two implicit
4495+
// directives: 'parallel' with 'masked' directive.
4496+
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4497+
Action.Enter(CGF);
4498+
OMPPrivateScope PrivateScope(CGF);
4499+
emitOMPCopyinClause(CGF, S);
4500+
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4501+
CGF.EmitOMPPrivateClause(S, PrivateScope);
4502+
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4503+
(void)PrivateScope.Privatize();
4504+
emitMasked(CGF, S);
4505+
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4506+
};
4507+
{
4508+
auto LPCRegion =
4509+
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4510+
emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4511+
emitEmptyBoundParameters);
4512+
emitPostUpdateForReductionClause(*this, S,
4513+
[](CodeGenFunction &) { return nullptr; });
4514+
}
4515+
// Check for outer lastprivate conditional update.
4516+
checkForLastprivateConditionalUpdate(*this, S);
4517+
}
4518+
44924519
void CodeGenFunction::EmitOMPParallelSectionsDirective(
44934520
const OMPParallelSectionsDirective &S) {
44944521
// Emit directive as a combined directive that consists of two implicit

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3585,6 +3585,7 @@ class CodeGenFunction : public CodeGenTypeCache {
35853585
const OMPTargetTeamsDistributeSimdDirective &S);
35863586
void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
35873587
void EmitOMPInteropDirective(const OMPInteropDirective &S);
3588+
void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S);
35883589

35893590
/// Emit device code for the target directive.
35903591
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,

clang/lib/Parse/ParseOpenMP.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2483,8 +2483,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
24832483
/// simd' | 'teams distribute parallel for simd' | 'teams distribute
24842484
/// parallel for' | 'target teams' | 'target teams distribute' | 'target
24852485
/// teams distribute parallel for' | 'target teams distribute parallel
2486-
/// for simd' | 'target teams distribute simd' | 'masked' {clause}
2487-
/// annot_pragma_openmp_end
2486+
/// for simd' | 'target teams distribute simd' | 'masked' |
2487+
/// 'parallel masked' {clause} annot_pragma_openmp_end
24882488
///
24892489
StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
24902490
ParsedStmtContext StmtCtx, bool ReadDirectiveWithinMetadirective) {

clang/test/OpenMP/parallel_masked.cpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
2+
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -x c -emit-llvm %s -o - | FileCheck %s
3+
// expected-no-diagnostics
4+
5+
void foo();
6+
7+
void masked() {
8+
#pragma omp parallel masked
9+
{
10+
foo();
11+
}
12+
}
13+
14+
void maskedFilter() {
15+
const int tid = 1;
16+
#pragma omp parallel masked filter(tid)
17+
{
18+
foo();
19+
}
20+
}
21+
22+
void master() {
23+
#pragma omp parallel master
24+
{
25+
foo();
26+
}
27+
}
28+
// CHECK-LABEL: define {{[^@]+}}@masked
29+
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
30+
// CHECK-NEXT: entry:
31+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.)
32+
// CHECK-NEXT: ret void
33+
//
34+
//
35+
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
36+
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
37+
// CHECK-NEXT: entry:
38+
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
39+
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
40+
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
41+
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
42+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
43+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
44+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
45+
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
46+
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
47+
// CHECK: omp_if.then:
48+
// CHECK-NEXT: call void (...) @foo()
49+
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
50+
// CHECK-NEXT: br label [[OMP_IF_END]]
51+
// CHECK: omp_if.end:
52+
// CHECK-NEXT: ret void
53+
//
54+
//
55+
// CHECK-LABEL: define {{[^@]+}}@maskedFilter
56+
// CHECK-SAME: () #[[ATTR0]] {
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4
59+
// CHECK-NEXT: store i32 1, ptr [[TID]], align 4
60+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1)
61+
// CHECK-NEXT: ret void
62+
//
63+
//
64+
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
65+
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
66+
// CHECK-NEXT: entry:
67+
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
68+
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
69+
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
70+
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
71+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
72+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
73+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1)
74+
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
75+
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
76+
// CHECK: omp_if.then:
77+
// CHECK-NEXT: call void (...) @foo()
78+
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
79+
// CHECK-NEXT: br label [[OMP_IF_END]]
80+
// CHECK: omp_if.end:
81+
// CHECK-NEXT: ret void
82+
//
83+
//
84+
// CHECK-LABEL: define {{[^@]+}}@master
85+
// CHECK-SAME: () #[[ATTR0]] {
86+
// CHECK-NEXT: entry:
87+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2)
88+
// CHECK-NEXT: ret void
89+
//
90+
//
91+
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
92+
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
93+
// CHECK-NEXT: entry:
94+
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
95+
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
96+
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
97+
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
98+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
99+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
100+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
101+
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
102+
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
103+
// CHECK: omp_if.then:
104+
// CHECK-NEXT: call void (...) @foo()
105+
// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
106+
// CHECK-NEXT: br label [[OMP_IF_END]]
107+
// CHECK: omp_if.end:
108+
// CHECK-NEXT: ret void
109+
//
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
2+
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -fopenmp-targets=nvptx64 -offload-device-only -x c -emit-llvm %s -o - | FileCheck %s
3+
// expected-no-diagnostics
4+
5+
void foo();
6+
7+
void masked() {
8+
#pragma target
9+
#pragma omp parallel masked
10+
{
11+
foo();
12+
}
13+
}
14+
15+
void maskedFilter() {
16+
const int tid = 1;
17+
#pragma target
18+
#pragma omp parallel masked filter(tid)
19+
{
20+
foo();
21+
}
22+
}
23+
24+
void master() {
25+
#pragma target
26+
#pragma omp parallel master
27+
{
28+
foo();
29+
}
30+
}
31+
// CHECK-LABEL: define {{[^@]+}}@masked
32+
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
33+
// CHECK-NEXT: entry:
34+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.)
35+
// CHECK-NEXT: ret void
36+
//
37+
//
38+
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
39+
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
40+
// CHECK-NEXT: entry:
41+
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
42+
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
43+
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
44+
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
45+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
46+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
47+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
48+
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
49+
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
50+
// CHECK: omp_if.then:
51+
// CHECK-NEXT: call void (...) @foo()
52+
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
53+
// CHECK-NEXT: br label [[OMP_IF_END]]
54+
// CHECK: omp_if.end:
55+
// CHECK-NEXT: ret void
56+
//
57+
//
58+
// CHECK-LABEL: define {{[^@]+}}@maskedFilter
59+
// CHECK-SAME: () #[[ATTR0]] {
60+
// CHECK-NEXT: entry:
61+
// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4
62+
// CHECK-NEXT: store i32 1, ptr [[TID]], align 4
63+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1)
64+
// CHECK-NEXT: ret void
65+
//
66+
//
67+
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
68+
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
69+
// CHECK-NEXT: entry:
70+
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
71+
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
72+
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
73+
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
74+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
75+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
76+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1)
77+
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
78+
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
79+
// CHECK: omp_if.then:
80+
// CHECK-NEXT: call void (...) @foo()
81+
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
82+
// CHECK-NEXT: br label [[OMP_IF_END]]
83+
// CHECK: omp_if.end:
84+
// CHECK-NEXT: ret void
85+
//
86+
//
87+
// CHECK-LABEL: define {{[^@]+}}@master
88+
// CHECK-SAME: () #[[ATTR0]] {
89+
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2)
91+
// CHECK-NEXT: ret void
92+
//
93+
//
94+
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
95+
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
96+
// CHECK-NEXT: entry:
97+
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
98+
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
99+
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
100+
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
101+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
102+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
103+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
104+
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
105+
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
106+
// CHECK: omp_if.then:
107+
// CHECK-NEXT: call void (...) @foo()
108+
// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
109+
// CHECK-NEXT: br label [[OMP_IF_END]]
110+
// CHECK: omp_if.end:
111+
// CHECK-NEXT: ret void
112+
//

openmp/libomptarget/DeviceRTL/include/Interface.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
260260

261261
void __kmpc_end_master(IdentTy *Loc, int32_t TId);
262262

263+
int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
264+
265+
void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
266+
263267
int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
264268

265269
void __kmpc_end_single(IdentTy *Loc, int32_t TId);

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,13 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId) {
520520

521521
void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
522522

523+
int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter) {
524+
FunctionTracingRAII();
525+
return omp_get_thread_num() == Filter;
526+
}
527+
528+
void __kmpc_end_masked(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
529+
523530
int32_t __kmpc_single(IdentTy *Loc, int32_t TId) {
524531
FunctionTracingRAII();
525532
return __kmpc_master(Loc, TId);

0 commit comments

Comments
 (0)