Skip to content

Commit 7f7e174

Browse files
committed
[OpenMP] Be smarter about the insertion point for deduplication
We can use dominance and avoid the special handling of kernels and prevent inserting code before allocas accidentally (as happend in the runtime test).
1 parent 5244617 commit 7f7e174

File tree

4 files changed

+21
-28
lines changed

4 files changed

+21
-28
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/IR/BasicBlock.h"
3737
#include "llvm/IR/Constants.h"
3838
#include "llvm/IR/DiagnosticInfo.h"
39+
#include "llvm/IR/Dominators.h"
3940
#include "llvm/IR/Function.h"
4041
#include "llvm/IR/GlobalValue.h"
4142
#include "llvm/IR/GlobalVariable.h"
@@ -1706,37 +1707,27 @@ struct OpenMPOpt {
17061707
};
17071708

17081709
if (!ReplVal) {
1709-
for (Use *U : *UV)
1710+
auto *DT =
1711+
OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F);
1712+
if (!DT)
1713+
return false;
1714+
Instruction *IP = nullptr;
1715+
for (Use *U : *UV) {
17101716
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1717+
if (IP)
1718+
IP = DT->findNearestCommonDominator(IP, CI);
1719+
else
1720+
IP = CI;
17111721
if (!CanBeMoved(*CI))
17121722
continue;
1713-
1714-
// If the function is a kernel, dedup will move
1715-
// the runtime call right after the kernel init callsite. Otherwise,
1716-
// it will move it to the beginning of the caller function.
1717-
if (isKernel(F)) {
1718-
auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
1719-
auto *KernelInitUV = KernelInitRFI.getUseVector(F);
1720-
1721-
if (KernelInitUV->empty())
1722-
continue;
1723-
1724-
assert(KernelInitUV->size() == 1 &&
1725-
"Expected a single __kmpc_target_init in kernel\n");
1726-
1727-
CallInst *KernelInitCI =
1728-
getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
1729-
assert(KernelInitCI &&
1730-
"Expected a call to __kmpc_target_init in kernel\n");
1731-
1732-
CI->moveAfter(KernelInitCI);
1733-
} else
1734-
CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
1735-
ReplVal = CI;
1736-
break;
1723+
if (!ReplVal)
1724+
ReplVal = CI;
17371725
}
1726+
}
17381727
if (!ReplVal)
17391728
return false;
1729+
assert(IP && "Expected insertion point!");
1730+
cast<Instruction>(ReplVal)->moveBefore(IP);
17401731
}
17411732

17421733
// If we use a call as a replacement value we need to make sure the ident is

llvm/test/Transforms/OpenMP/deduplication.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ m:
102102
define void @local_and_global_gtid_calls() {
103103
; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() {
104104
; CHECK-NEXT: entry:
105-
; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
106105
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
106+
; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
107107
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
108108
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
109109
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
@@ -132,10 +132,10 @@ entry:
132132
define void @local_gtid_calls_only() {
133133
; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() {
134134
; CHECK-NEXT: entry:
135-
; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
136135
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
137136
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
138137
; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
138+
; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
139139
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
140140
; CHECK-NEXT: call void @useI32(i32 [[TID5]])
141141
; CHECK-NEXT: call void @useI32(i32 [[TID5]])

llvm/test/Transforms/OpenMP/deduplication_target.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
1919
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
2020
; CHECK-NEXT: entry:
2121
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
22-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
2322
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
2423
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
2524
; CHECK: user_code.entry:
25+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
2626
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
2727
; CHECK-NEXT: ret void
2828
; CHECK: worker.exit:

openmp/libomptarget/test/offloading/atomic-compare-signedness.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
// RUN: %libomptarget-compile-generic -fopenmp-version=51
77
// RUN: %libomptarget-run-generic | %fcheck-generic
8+
// RUN: %libomptarget-compileopt-generic -fopenmp-version=51
9+
// RUN: %libomptarget-run-generic | %fcheck-generic
810

911
// High parallelism increases our chances of detecting a lack of atomicity.
1012
#define NUM_THREADS_TRY 256

0 commit comments

Comments
 (0)